Skip to content
Snippets Groups Projects
Commit 5c226ffd authored by Eike Cochu's avatar Eike Cochu
Browse files

updated configuration

rewrote config file, using reflection to fill field values
added config command to print current configuration
parent fa749921
No related branches found
No related tags found
No related merge requests found
Showing
with 240 additions and 62 deletions
......@@ -35,6 +35,9 @@ public class CmdOptions extends Options {
public static final String OPT_SILENT = "s";
public static final String OPT_SILENT_LONG = "silent";
public static final String OPT_CONFIG = "o";
public static final String OPT_CONFIG_LONG = "config";
public CmdOptions() {
addOption(Option.builder(OPT_HELP).longOpt(OPT_HELP_LONG).desc("print this message").build());
addOption(Option.builder(OPT_SHELL).longOpt(OPT_SHELL_LONG).hasArg(true).argName("name")
......@@ -48,6 +51,7 @@ public class CmdOptions extends Options {
addOption(Option.builder(OPT_DEBUG).longOpt(OPT_DEBUG_LONG).desc("show debug information").build());
addOption(Option.builder(OPT_TEST).longOpt(OPT_TEST_LONG).desc("system tests").build());
addOption(Option.builder(OPT_SILENT).longOpt(OPT_SILENT_LONG).desc("mute all output").build());
addOption(Option.builder(OPT_CONFIG).longOpt(OPT_CONFIG_LONG).desc("show configuration").build());
}
public void printHelp(String cmd) {
......
......@@ -9,6 +9,7 @@ import static de.vipra.cmd.CmdOptions.OPT_SHELL;
import static de.vipra.cmd.CmdOptions.OPT_SILENT;
import static de.vipra.cmd.CmdOptions.OPT_STATS;
import static de.vipra.cmd.CmdOptions.OPT_TEST;
import static de.vipra.cmd.CmdOptions.OPT_CONFIG;
import java.util.ArrayList;
import java.util.List;
......@@ -33,6 +34,7 @@ import com.mongodb.MongoTimeoutException;
import de.vipra.cmd.option.ClearCommand;
import de.vipra.cmd.option.Command;
import de.vipra.cmd.option.ConfigCommand;
import de.vipra.cmd.option.ImportCommand;
import de.vipra.cmd.option.StatsCommand;
import de.vipra.cmd.option.TestCommand;
......@@ -94,6 +96,9 @@ public class Main {
// get commands
List<Command> commands = new ArrayList<>();
if (cline.hasOption(OPT_CONFIG))
commands.add(new ConfigCommand());
if (cline.hasOption(OPT_TEST))
commands.add(new TestCommand());
......
......@@ -7,7 +7,6 @@ import java.io.IOException;
import de.vipra.cmd.ex.FilebaseException;
import de.vipra.cmd.model.ProcessedArticle;
import de.vipra.util.Config;
import de.vipra.util.Config.Key;
import de.vipra.util.Constants;
import de.vipra.util.ex.ConfigException;
......@@ -65,7 +64,7 @@ public abstract class Filebase implements Closeable {
public static Filebase getFilebase(Config config) throws FilebaseException, ConfigException {
File dataDir = config.getDataDirectory();
switch (Constants.Analyzer.fromString(config.getString(Key.ANALYZER))) {
switch (config.analyzer) {
case JGIBB:
case DEFAULT:
default:
......
......@@ -4,8 +4,6 @@ import java.util.List;
import de.vipra.cmd.ex.LDAAnalyzerException;
import de.vipra.util.Config;
import de.vipra.util.Config.Key;
import de.vipra.util.Constants;
import de.vipra.util.ConvertStream;
import de.vipra.util.WordMap;
import de.vipra.util.model.TopicFull;
......@@ -50,7 +48,7 @@ public abstract class LDAAnalyzer {
public static LDAAnalyzer getAnalyzer(Config config, WordMap wordMap) throws LDAAnalyzerException {
LDAAnalyzer analyzer = null;
switch (Constants.Analyzer.fromString(config.getString(Key.ANALYZER))) {
switch (config.analyzer) {
case JGIBB:
case DEFAULT:
default:
......
package de.vipra.cmd.option;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import de.vipra.util.Config;
public class ConfigCommand implements Command {
public static final Logger log = LogManager.getLogger("shellout");
@Override
public void run() throws Exception {
Config config = Config.getConfig();
log.info("Current configuration:");
config.print(System.out, "", " : ", true, true, ' ');
}
}
......@@ -115,7 +115,7 @@ public class ImportCommand implements Command {
article.fromJSON(obj);
// preprocess text and generate text statistics
ProcessedText processedText = preprocessor.preprocess(article.getText());
ProcessedText processedText = preprocessor.process(article.getText());
ArticleStats articleStats = ArticleStats.generateFromText(processedText.getText(), wordMap);
// add article to mongodb
......@@ -124,8 +124,10 @@ public class ImportCommand implements Command {
article = dbArticles.createSingle(article);
// add words
if (config.saveAllWords) {
for (String word : processedText.getWords())
wordMap.add(word);
}
// add article to filebase
filebase.add(article);
......@@ -170,12 +172,14 @@ public class ImportCommand implements Command {
@Override
public void run() throws Exception {
config = Config.getConfig();
@SuppressWarnings("unused")
Config asd = Config.getConfig();
dbArticles = DatabaseService.getDatabaseService(config, ProcessedArticle.class);
dbTopics = DatabaseService.getDatabaseService(config, TopicFull.class);
dbWords = DatabaseService.getDatabaseService(config, Word.class);
dbImports = DatabaseService.getDatabaseService(config, Import.class);
filebase = Filebase.getFilebase(config);
preprocessor = Processor.getPreprocessor(config);
preprocessor = Processor.getProcessor(config);
wordMap = new WordMap(dbWords);
analyzer = LDAAnalyzer.getAnalyzer(config, wordMap);
elasticClient = ESClient.getClient(config);
......
......@@ -28,7 +28,7 @@ public class CoreNLPProcessor extends Processor {
}
@Override
public ProcessedText preprocess(String input) throws PreprocessorException {
public ProcessedText process(String input) throws PreprocessorException {
Annotation doc = new Annotation(input.toLowerCase());
nlp.annotate(doc);
StringBuilder sb = new StringBuilder();
......
......@@ -4,7 +4,6 @@ import java.util.List;
import de.vipra.cmd.ex.PreprocessorException;
import de.vipra.util.Config;
import de.vipra.util.Config.Key;
import de.vipra.util.Constants;
public abstract class Processor {
......@@ -19,12 +18,12 @@ public abstract class Processor {
return name;
}
public abstract ProcessedText preprocess(String input) throws PreprocessorException;
public abstract ProcessedText process(String input) throws PreprocessorException;
public static Processor getPreprocessor(Config config) {
public static Processor getProcessor(Config config) {
List<String> stopWords = Constants.STOPWORDS;
switch (Constants.Processor.fromString(config.getString(Key.PREPROCESSOR))) {
switch (config.processor) {
case CORENLP:
return new CoreNLPProcessor(stopWords);
default:
......
db.host=localhost
db.port=27017
db.name=test
fb.path=/home/eike/.local/share/vipra
\ No newline at end of file
tm.processor=corenlp
tm.analyzer=jgibb
tm.saveallwords=false
\ No newline at end of file
db.host=localhost
db.port=27017
db.name=test
fb.path=/home/eike/.local/share/vipra
\ No newline at end of file
tm.processor=corenlp
tm.analyzer=jgibb
tm.saveallwords=false
\ No newline at end of file
......@@ -38,4 +38,8 @@
</div>
</div>
<div class="row">
{{filter}}
</div>
</div>
\ No newline at end of file
......@@ -3,44 +3,93 @@ package de.vipra.util;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.util.Map.Entry;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.vipra.util.Constants.Analyzer;
import de.vipra.util.Constants.Processor;
import de.vipra.util.an.ConfigKey;
import de.vipra.util.ex.ConfigException;
import de.vipra.util.model.Model;
import de.vipra.util.service.DatabaseService;
public class Config {
public static enum Key {
DBHOST("db.host", Constants.DB_HOST),
DBPORT("db.port", Constants.DB_PORT),
DBNAME("db.name", Constants.DB_NAME),
DATADIR("fs.datadir", null),
PREPROCESSOR("an.preprocessor", Constants.Processor.DEFAULT.name),
ANALYZER("an.analyzer", Constants.Analyzer.DEFAULT.name),
STOPWORDS("an.stopwords", "");
/*
* Configuration keys
*/
private final String name;
private final Object defVal;
@ConfigKey("db.host")
public String databaseHost = Constants.DB_HOST;
Key(String name, Object defVal) {
this.name = name;
this.defVal = defVal;
}
}
@ConfigKey("db.port")
public int databasePort = Constants.DB_PORT;
@ConfigKey("db.name")
public String databaseName = Constants.DB_NAME;
@ConfigKey("tm.processor")
public Processor processor = Constants.Processor.DEFAULT;
@ConfigKey("tm.analyzer")
public Analyzer analyzer = Constants.Analyzer.DEFAULT;
@ConfigKey("tm.saveallwords")
public boolean saveAllWords = Constants.SAVE_ALL_WORDS;
/*
* Configuration reader
*/
public static final Logger log = LoggerFactory.getLogger(Config.class);
private static Config config;
private static Set<Entry<String, Field>> fields;
private static int printMaxFieldNameLength = 0;
static {
Map<String, Field> foundFields = new HashMap<>();
for (Field field : Config.class.getDeclaredFields()) {
int modifiers = field.getModifiers();
if (!Modifier.isStatic(modifiers)) {
field.setAccessible(true);
ConfigKey ck = field.getDeclaredAnnotation(ConfigKey.class);
if (ck == null)
continue;
String name = ck.value();
if (name == null || name.isEmpty()) {
name = field.getName();
}
if (name.length() > printMaxFieldNameLength)
printMaxFieldNameLength = name.length();
foundFields.put(name, field);
}
}
fields = foundFields.entrySet();
}
private final Properties props = new Properties();
@SuppressWarnings({ "unchecked", "rawtypes" })
private Config() throws IOException, ConfigException {
InputStream in = null;
// load config from environment
// config from environment
String configPath = System.getenv("VIPRA_CONFIG");
if (configPath != null && configPath.length() > 0) {
File file = new File(configPath);
......@@ -49,7 +98,7 @@ public class Config {
}
}
// load config from generic config dir
// config from generic config dir
File configDir = getGenericConfigDir();
if (configDir != null && configDir.exists() && configDir.isDirectory()) {
File file = new File(configDir, Constants.CONFIG_FILE);
......@@ -58,42 +107,61 @@ public class Config {
}
}
// load config from source
// config from source
if (in == null) {
in = FileUtils.getResource(Constants.CONFIG_FILE);
}
load(in);
}
private void load(InputStream is) throws ConfigException, IOException {
if (is == null) {
// load config
if (in == null) {
log.error("config file input stream is null");
throw new ConfigException("config file input stream is null");
} else {
props.load(is);
}
props.load(in);
}
public String getString(Key key) {
return props.getProperty(key.name, (String) key.defVal);
}
public Integer getInt(Key key) {
// read values
for (Entry<String, Field> entry : fields) {
String value = props.getProperty(entry.getKey());
if (value != null) {
Object parsedValue = null;
try {
return Integer.parseInt(props.getProperty(key.name));
} catch (NumberFormatException e) {
return (Integer) key.defVal;
switch (entry.getValue().getType().getSimpleName()) {
case "int":
case "Integer":
parsedValue = Integer.parseInt(value);
break;
case "double":
case "Double":
parsedValue = Double.parseDouble(value);
break;
case "float":
case "Float":
parsedValue = Float.parseFloat(value);
break;
case "boolean":
case "Boolean":
parsedValue = Boolean.parseBoolean(value);
break;
case "String":
parsedValue = value;
break;
default:
if (Enum.class.isAssignableFrom(entry.getValue().getType())) {
parsedValue = searchEnum((Class<Enum>) entry.getValue().getType(), value);
}
break;
}
entry.getValue().set(this, parsedValue);
} catch (Exception e) {
log.error("could not read config value " + entry.getKey(), e);
}
}
}
}
public File getDataDirectory() throws ConfigException {
String path = getString(Key.DATADIR);
File dataDir;
if (path != null) {
dataDir = new File(path);
} else {
dataDir = getGenericDataDir();
}
File dataDir = getGenericDataDir();
if (!dataDir.exists()) {
if (!dataDir.mkdirs()) {
......@@ -113,6 +181,45 @@ public class Config {
return DatabaseService.getDatabaseService(this, clazz);
}
/**
* Prints out the current configuration values
*
* @param out
* OutputStream to be used for printing. Usually System.out
* @param prefix
* Line prefix, appended before each printed line
* @param separator
* The separator between the key and the value
* @param pad
* set to true to pad values to alignment. Aligns to the longest
* of keys
* @param padRight
* set to true to pad after the key, false to pad before the key,
* if enabled
* @param padChar
* the pad character to be used for padding, if enabled
*/
public void print(OutputStream out, String prefix, String separator, boolean pad, boolean padRight, char padChar) {
PrintWriter pw = new PrintWriter(out);
String padding = padChar + "";
for (Entry<String, Field> e : fields) {
try {
String key = e.getKey() + separator;
if (pad) {
int diff = printMaxFieldNameLength - e.getKey().length();
if (diff > 0) {
if (padRight)
key = e.getKey() + StringUtils.repeat(padding, diff) + separator;
else
key = StringUtils.repeat(padding, diff) + e.getKey() + separator;
}
}
pw.println(prefix + key + e.getValue().get(this));
} catch (IllegalArgumentException | IllegalAccessException e1) {}
}
pw.flush();
}
public static File getGenericDataDir() {
File base = PathUtils.appDataDir();
return new File(base, Constants.FB_DIR);
......@@ -130,4 +237,13 @@ public class Config {
return config;
}
public static <T extends Enum<?>> T searchEnum(Class<T> enumeration, String search) {
for (T each : enumeration.getEnumConstants()) {
if (each.name().compareToIgnoreCase(search) == 0) {
return each;
}
}
return null;
}
}
......@@ -70,6 +70,12 @@ public class Constants {
*/
public static final double TOPIC_THRESHOLD = 0.01;
/**
* Set this to true to save all found words in the database. If false, will
* save only topic related words found by topic modeling.
*/
public static final boolean SAVE_ALL_WORDS = false;
/**
* Stopwords list. Extensive list of stopwords used to clean imported
* articles of the most common words before topic modeling is applied.
......
......@@ -10,7 +10,6 @@ import org.slf4j.LoggerFactory;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientOptions;
import de.vipra.util.Config.Key;
import de.vipra.util.ex.ConfigException;
public class Mongo {
......@@ -28,9 +27,9 @@ public class Mongo {
private final Datastore datastore;
private Mongo(Config config) throws ConfigException {
String host = config.getString(Key.DBHOST);
Integer port = config.getInt(Key.DBPORT);
String databaseName = config.getString(Key.DBNAME);
String host = config.databaseHost;
Integer port = config.databasePort;
String databaseName = config.databaseName;
if (host == null || port == null || databaseName == null) {
log.error("host/port/dbname missing in configuration");
......
......@@ -172,4 +172,11 @@ public class StringUtils {
return sb.toString();
}
public static String repeat(String pattern, int count) {
StringBuilder sb = new StringBuilder();
while (count-- > 0)
sb.append(pattern);
return sb.toString();
}
}
package de.vipra.util.an;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.FIELD)
public @interface ConfigKey {
public String value() default "";
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment