From 5c226ffd23be6749ed5193c6e3ebe722522fc35b Mon Sep 17 00:00:00 2001 From: Eike Cochu <eike@cochu.com> Date: Fri, 29 Jan 2016 01:48:21 +0100 Subject: [PATCH] updated configuration rewrote config file, using reflection to fill field values added config command to print current configuration --- .../main/java/de/vipra/cmd/CmdOptions.java | 4 + .../src/main/java/de/vipra/cmd/Main.java | 5 + .../main/java/de/vipra/cmd/file/Filebase.java | 3 +- .../java/de/vipra/cmd/lda/LDAAnalyzer.java | 4 +- .../de/vipra/cmd/option/ConfigCommand.java | 19 ++ .../de/vipra/cmd/option/ImportCommand.java | 12 +- .../de/vipra/cmd/text/CoreNLPProcessor.java | 2 +- .../java/de/vipra/cmd/text/Processor.java | 7 +- .../src/main/resources/config.properties | 4 +- .../src/main/resources/config.properties | 4 +- vipra-ui/app/templates/index.hbs | 4 + .../src/main/java/de/vipra/util/Config.java | 200 ++++++++++++++---- .../main/java/de/vipra/util/Constants.java | 6 + .../src/main/java/de/vipra/util/Mongo.java | 7 +- .../main/java/de/vipra/util/StringUtils.java | 7 + .../main/java/de/vipra/util/an/ConfigKey.java | 14 ++ 16 files changed, 240 insertions(+), 62 deletions(-) create mode 100644 vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigCommand.java create mode 100644 vipra-util/src/main/java/de/vipra/util/an/ConfigKey.java diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java b/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java index d3d6a79f..a0bbdab4 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java @@ -35,6 +35,9 @@ public class CmdOptions extends Options { public static final String OPT_SILENT = "s"; public static final String OPT_SILENT_LONG = "silent"; + public static final String OPT_CONFIG = "o"; + public static final String OPT_CONFIG_LONG = "config"; + public CmdOptions() { addOption(Option.builder(OPT_HELP).longOpt(OPT_HELP_LONG).desc("print this message").build()); addOption(Option.builder(OPT_SHELL).longOpt(OPT_SHELL_LONG).hasArg(true).argName("name") @@ -48,6 +51,7 @@ public class CmdOptions extends Options { addOption(Option.builder(OPT_DEBUG).longOpt(OPT_DEBUG_LONG).desc("show debug information").build()); addOption(Option.builder(OPT_TEST).longOpt(OPT_TEST_LONG).desc("system tests").build()); addOption(Option.builder(OPT_SILENT).longOpt(OPT_SILENT_LONG).desc("mute all output").build()); + addOption(Option.builder(OPT_CONFIG).longOpt(OPT_CONFIG_LONG).desc("show configuration").build()); } public void printHelp(String cmd) { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java index 6f2e0794..5aa791b0 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java @@ -9,6 +9,7 @@ import static de.vipra.cmd.CmdOptions.OPT_SHELL; import static de.vipra.cmd.CmdOptions.OPT_SILENT; import static de.vipra.cmd.CmdOptions.OPT_STATS; import static de.vipra.cmd.CmdOptions.OPT_TEST; +import static de.vipra.cmd.CmdOptions.OPT_CONFIG; import java.util.ArrayList; import java.util.List; @@ -33,6 +34,7 @@ import com.mongodb.MongoTimeoutException; import de.vipra.cmd.option.ClearCommand; import de.vipra.cmd.option.Command; +import de.vipra.cmd.option.ConfigCommand; import de.vipra.cmd.option.ImportCommand; import de.vipra.cmd.option.StatsCommand; import de.vipra.cmd.option.TestCommand; @@ -94,6 +96,9 @@ public class Main { // get commands List<Command> commands = new ArrayList<>(); + if (cline.hasOption(OPT_CONFIG)) + commands.add(new ConfigCommand()); + if (cline.hasOption(OPT_TEST)) commands.add(new TestCommand()); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java index 6d7e914f..0bc63735 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java @@ -7,7 +7,6 @@ import java.io.IOException; import de.vipra.cmd.ex.FilebaseException; import de.vipra.cmd.model.ProcessedArticle; import de.vipra.util.Config; -import de.vipra.util.Config.Key; import de.vipra.util.Constants; import de.vipra.util.ex.ConfigException; @@ -65,7 +64,7 @@ public abstract class Filebase implements Closeable { public static Filebase getFilebase(Config config) throws FilebaseException, ConfigException { File dataDir = config.getDataDirectory(); - switch (Constants.Analyzer.fromString(config.getString(Key.ANALYZER))) { + switch (config.analyzer) { case JGIBB: case DEFAULT: default: diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java index d5ce849f..d69375d3 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java @@ -4,8 +4,6 @@ import java.util.List; import de.vipra.cmd.ex.LDAAnalyzerException; import de.vipra.util.Config; -import de.vipra.util.Config.Key; -import de.vipra.util.Constants; import de.vipra.util.ConvertStream; import de.vipra.util.WordMap; import de.vipra.util.model.TopicFull; @@ -50,7 +48,7 @@ public abstract class LDAAnalyzer { public static LDAAnalyzer getAnalyzer(Config config, WordMap wordMap) throws LDAAnalyzerException { LDAAnalyzer analyzer = null; - switch (Constants.Analyzer.fromString(config.getString(Key.ANALYZER))) { + switch (config.analyzer) { case JGIBB: case DEFAULT: default: diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigCommand.java new file mode 100644 index 00000000..124827b2 --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ConfigCommand.java @@ -0,0 +1,19 @@ +package de.vipra.cmd.option; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import de.vipra.util.Config; + +public class ConfigCommand implements Command { + + public static final Logger log = LogManager.getLogger("shellout"); + + @Override + public void run() throws Exception { + Config config = Config.getConfig(); + log.info("Current configuration:"); + config.print(System.out, "", " : ", true, true, ' '); + } + +} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 0e43bd17..07594ef8 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -115,7 +115,7 @@ public class ImportCommand implements Command { article.fromJSON(obj); // preprocess text and generate text statistics - ProcessedText processedText = preprocessor.preprocess(article.getText()); + ProcessedText processedText = preprocessor.process(article.getText()); ArticleStats articleStats = ArticleStats.generateFromText(processedText.getText(), wordMap); // add article to mongodb @@ -124,8 +124,10 @@ public class ImportCommand implements Command { article = dbArticles.createSingle(article); // add words - for (String word : processedText.getWords()) - wordMap.add(word); + if (config.saveAllWords) { + for (String word : processedText.getWords()) + wordMap.add(word); + } // add article to filebase filebase.add(article); @@ -170,12 +172,14 @@ public class ImportCommand implements Command { @Override public void run() throws Exception { config = Config.getConfig(); + @SuppressWarnings("unused") + Config asd = Config.getConfig(); dbArticles = DatabaseService.getDatabaseService(config, ProcessedArticle.class); dbTopics = DatabaseService.getDatabaseService(config, TopicFull.class); dbWords = DatabaseService.getDatabaseService(config, Word.class); dbImports = DatabaseService.getDatabaseService(config, Import.class); filebase = Filebase.getFilebase(config); - preprocessor = Processor.getPreprocessor(config); + preprocessor = Processor.getProcessor(config); wordMap = new WordMap(dbWords); analyzer = LDAAnalyzer.getAnalyzer(config, wordMap); elasticClient = ESClient.getClient(config); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java index fee641c6..b2b4605b 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/CoreNLPProcessor.java @@ -28,7 +28,7 @@ public class CoreNLPProcessor extends Processor { } @Override - public ProcessedText preprocess(String input) throws PreprocessorException { + public ProcessedText process(String input) throws PreprocessorException { Annotation doc = new Annotation(input.toLowerCase()); nlp.annotate(doc); StringBuilder sb = new StringBuilder(); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java index a74c32e1..51498e5b 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java @@ -4,7 +4,6 @@ import java.util.List; import de.vipra.cmd.ex.PreprocessorException; import de.vipra.util.Config; -import de.vipra.util.Config.Key; import de.vipra.util.Constants; public abstract class Processor { @@ -19,12 +18,12 @@ public abstract class Processor { return name; } - public abstract ProcessedText preprocess(String input) throws PreprocessorException; + public abstract ProcessedText process(String input) throws PreprocessorException; - public static Processor getPreprocessor(Config config) { + public static Processor getProcessor(Config config) { List<String> stopWords = Constants.STOPWORDS; - switch (Constants.Processor.fromString(config.getString(Key.PREPROCESSOR))) { + switch (config.processor) { case CORENLP: return new CoreNLPProcessor(stopWords); default: diff --git a/vipra-cmd/src/main/resources/config.properties b/vipra-cmd/src/main/resources/config.properties index ae859d29..0778073f 100644 --- a/vipra-cmd/src/main/resources/config.properties +++ b/vipra-cmd/src/main/resources/config.properties @@ -1,4 +1,6 @@ db.host=localhost db.port=27017 db.name=test -fb.path=/home/eike/.local/share/vipra \ No newline at end of file +tm.processor=corenlp +tm.analyzer=jgibb +tm.saveallwords=false \ No newline at end of file diff --git a/vipra-rest/src/main/resources/config.properties b/vipra-rest/src/main/resources/config.properties index ae859d29..0778073f 100644 --- a/vipra-rest/src/main/resources/config.properties +++ b/vipra-rest/src/main/resources/config.properties @@ -1,4 +1,6 @@ db.host=localhost db.port=27017 db.name=test -fb.path=/home/eike/.local/share/vipra \ No newline at end of file +tm.processor=corenlp +tm.analyzer=jgibb +tm.saveallwords=false \ No newline at end of file diff --git a/vipra-ui/app/templates/index.hbs b/vipra-ui/app/templates/index.hbs index 9f163524..88012faa 100644 --- a/vipra-ui/app/templates/index.hbs +++ b/vipra-ui/app/templates/index.hbs @@ -38,4 +38,8 @@ </div> </div> + <div class="row"> + {{filter}} + </div> + </div> \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/Config.java b/vipra-util/src/main/java/de/vipra/util/Config.java index 8bd41a0c..742aaa47 100644 --- a/vipra-util/src/main/java/de/vipra/util/Config.java +++ b/vipra-util/src/main/java/de/vipra/util/Config.java @@ -3,44 +3,93 @@ package de.vipra.util; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.Map.Entry; +import java.util.HashMap; +import java.util.Map; import java.util.Properties; +import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import de.vipra.util.Constants.Analyzer; +import de.vipra.util.Constants.Processor; +import de.vipra.util.an.ConfigKey; import de.vipra.util.ex.ConfigException; import de.vipra.util.model.Model; import de.vipra.util.service.DatabaseService; public class Config { - public static enum Key { - DBHOST("db.host", Constants.DB_HOST), - DBPORT("db.port", Constants.DB_PORT), - DBNAME("db.name", Constants.DB_NAME), - DATADIR("fs.datadir", null), - PREPROCESSOR("an.preprocessor", Constants.Processor.DEFAULT.name), - ANALYZER("an.analyzer", Constants.Analyzer.DEFAULT.name), - STOPWORDS("an.stopwords", ""); - - private final String name; - private final Object defVal; - - Key(String name, Object defVal) { - this.name = name; - this.defVal = defVal; - } - } + /* + * Configuration keys + */ + + @ConfigKey("db.host") + public String databaseHost = Constants.DB_HOST; + + @ConfigKey("db.port") + public int databasePort = Constants.DB_PORT; + + @ConfigKey("db.name") + public String databaseName = Constants.DB_NAME; + + @ConfigKey("tm.processor") + public Processor processor = Constants.Processor.DEFAULT; + + @ConfigKey("tm.analyzer") + public Analyzer analyzer = Constants.Analyzer.DEFAULT; + + @ConfigKey("tm.saveallwords") + public boolean saveAllWords = Constants.SAVE_ALL_WORDS; + + /* + * Configuration reader + */ public static final Logger log = LoggerFactory.getLogger(Config.class); private static Config config; + private static Set<Entry<String, Field>> fields; + private static int printMaxFieldNameLength = 0; + + static { + Map<String, Field> foundFields = new HashMap<>(); + + for (Field field : Config.class.getDeclaredFields()) { + int modifiers = field.getModifiers(); + if (!Modifier.isStatic(modifiers)) { + field.setAccessible(true); + + ConfigKey ck = field.getDeclaredAnnotation(ConfigKey.class); + if (ck == null) + continue; + + String name = ck.value(); + if (name == null || name.isEmpty()) { + name = field.getName(); + } + + if (name.length() > printMaxFieldNameLength) + printMaxFieldNameLength = name.length(); + + foundFields.put(name, field); + } + } + + fields = foundFields.entrySet(); + } private final Properties props = new Properties(); + @SuppressWarnings({ "unchecked", "rawtypes" }) private Config() throws IOException, ConfigException { InputStream in = null; - // load config from environment + // config from environment String configPath = System.getenv("VIPRA_CONFIG"); if (configPath != null && configPath.length() > 0) { File file = new File(configPath); @@ -49,7 +98,7 @@ public class Config { } } - // load config from generic config dir + // config from generic config dir File configDir = getGenericConfigDir(); if (configDir != null && configDir.exists() && configDir.isDirectory()) { File file = new File(configDir, Constants.CONFIG_FILE); @@ -58,42 +107,61 @@ public class Config { } } - // load config from source + // config from source if (in == null) { in = FileUtils.getResource(Constants.CONFIG_FILE); } - load(in); - } - private void load(InputStream is) throws ConfigException, IOException { - if (is == null) { + // load config + if (in == null) { log.error("config file input stream is null"); throw new ConfigException("config file input stream is null"); } else { - props.load(is); + props.load(in); } - } - public String getString(Key key) { - return props.getProperty(key.name, (String) key.defVal); - } - - public Integer getInt(Key key) { - try { - return Integer.parseInt(props.getProperty(key.name)); - } catch (NumberFormatException e) { - return (Integer) key.defVal; + // read values + for (Entry<String, Field> entry : fields) { + String value = props.getProperty(entry.getKey()); + if (value != null) { + Object parsedValue = null; + try { + switch (entry.getValue().getType().getSimpleName()) { + case "int": + case "Integer": + parsedValue = Integer.parseInt(value); + break; + case "double": + case "Double": + parsedValue = Double.parseDouble(value); + break; + case "float": + case "Float": + parsedValue = Float.parseFloat(value); + break; + case "boolean": + case "Boolean": + parsedValue = Boolean.parseBoolean(value); + break; + case "String": + parsedValue = value; + break; + default: + if (Enum.class.isAssignableFrom(entry.getValue().getType())) { + parsedValue = searchEnum((Class<Enum>) entry.getValue().getType(), value); + } + break; + } + entry.getValue().set(this, parsedValue); + } catch (Exception e) { + log.error("could not read config value " + entry.getKey(), e); + } + } } } public File getDataDirectory() throws ConfigException { - String path = getString(Key.DATADIR); - File dataDir; - if (path != null) { - dataDir = new File(path); - } else { - dataDir = getGenericDataDir(); - } + File dataDir = getGenericDataDir(); if (!dataDir.exists()) { if (!dataDir.mkdirs()) { @@ -113,6 +181,45 @@ public class Config { return DatabaseService.getDatabaseService(this, clazz); } + /** + * Prints out the current configuration values + * + * @param out + * OutputStream to be used for printing. Usually System.out + * @param prefix + * Line prefix, appended before each printed line + * @param separator + * The separator between the key and the value + * @param pad + * set to true to pad values to alignment. Aligns to the longest + * of keys + * @param padRight + * set to true to pad after the key, false to pad before the key, + * if enabled + * @param padChar + * the pad character to be used for padding, if enabled + */ + public void print(OutputStream out, String prefix, String separator, boolean pad, boolean padRight, char padChar) { + PrintWriter pw = new PrintWriter(out); + String padding = padChar + ""; + for (Entry<String, Field> e : fields) { + try { + String key = e.getKey() + separator; + if (pad) { + int diff = printMaxFieldNameLength - e.getKey().length(); + if (diff > 0) { + if (padRight) + key = e.getKey() + StringUtils.repeat(padding, diff) + separator; + else + key = StringUtils.repeat(padding, diff) + e.getKey() + separator; + } + } + pw.println(prefix + key + e.getValue().get(this)); + } catch (IllegalArgumentException | IllegalAccessException e1) {} + } + pw.flush(); + } + public static File getGenericDataDir() { File base = PathUtils.appDataDir(); return new File(base, Constants.FB_DIR); @@ -130,4 +237,13 @@ public class Config { return config; } + public static <T extends Enum<?>> T searchEnum(Class<T> enumeration, String search) { + for (T each : enumeration.getEnumConstants()) { + if (each.name().compareToIgnoreCase(search) == 0) { + return each; + } + } + return null; + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 8c3e176f..0accb717 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -70,6 +70,12 @@ public class Constants { */ public static final double TOPIC_THRESHOLD = 0.01; + /** + * Set this to true to save all found words in the database. If false, will + * save only topic related words found by topic modeling. + */ + public static final boolean SAVE_ALL_WORDS = false; + /** * Stopwords list. Extensive list of stopwords used to clean imported * articles of the most common words before topic modeling is applied. diff --git a/vipra-util/src/main/java/de/vipra/util/Mongo.java b/vipra-util/src/main/java/de/vipra/util/Mongo.java index b38c6164..8f82187b 100644 --- a/vipra-util/src/main/java/de/vipra/util/Mongo.java +++ b/vipra-util/src/main/java/de/vipra/util/Mongo.java @@ -10,7 +10,6 @@ import org.slf4j.LoggerFactory; import com.mongodb.MongoClient; import com.mongodb.MongoClientOptions; -import de.vipra.util.Config.Key; import de.vipra.util.ex.ConfigException; public class Mongo { @@ -28,9 +27,9 @@ public class Mongo { private final Datastore datastore; private Mongo(Config config) throws ConfigException { - String host = config.getString(Key.DBHOST); - Integer port = config.getInt(Key.DBPORT); - String databaseName = config.getString(Key.DBNAME); + String host = config.databaseHost; + Integer port = config.databasePort; + String databaseName = config.databaseName; if (host == null || port == null || databaseName == null) { log.error("host/port/dbname missing in configuration"); diff --git a/vipra-util/src/main/java/de/vipra/util/StringUtils.java b/vipra-util/src/main/java/de/vipra/util/StringUtils.java index e093670a..9d82a4c2 100644 --- a/vipra-util/src/main/java/de/vipra/util/StringUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/StringUtils.java @@ -172,4 +172,11 @@ public class StringUtils { return sb.toString(); } + public static String repeat(String pattern, int count) { + StringBuilder sb = new StringBuilder(); + while (count-- > 0) + sb.append(pattern); + return sb.toString(); + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/an/ConfigKey.java b/vipra-util/src/main/java/de/vipra/util/an/ConfigKey.java new file mode 100644 index 00000000..12577016 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/an/ConfigKey.java @@ -0,0 +1,14 @@ +package de.vipra.util.an; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.FIELD) +public @interface ConfigKey { + + public String value() default ""; + +} -- GitLab