diff --git a/vipra-cmd/.classpath b/vipra-cmd/.classpath index 40970bd0b82a9de07acfa8546c944293fd7552dd..88e7a6ba323a197c37c8f597ce644f3256af68f9 100644 --- a/vipra-cmd/.classpath +++ b/vipra-cmd/.classpath @@ -11,17 +11,6 @@ <attribute name="maven.pomderived" value="true"/> </attributes> </classpathentry> - <classpathentry kind="src" output="target/test-classes" path="src/test/java"> - <attributes> - <attribute name="optional" value="true"/> - <attribute name="maven.pomderived" value="true"/> - </attributes> - </classpathentry> - <classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources"> - <attributes> - <attribute name="maven.pomderived" value="true"/> - </attributes> - </classpathentry> <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> <attributes> <attribute name="maven.pomderived" value="true"/> diff --git a/vipra-cmd/.settings/org.eclipse.core.resources.prefs b/vipra-cmd/.settings/org.eclipse.core.resources.prefs index 29abf999564110a0d6aca109f55f439c72b7031c..abdea9ac032d4655898933f93050f48bf9581d14 100644 --- a/vipra-cmd/.settings/org.eclipse.core.resources.prefs +++ b/vipra-cmd/.settings/org.eclipse.core.resources.prefs @@ -1,6 +1,4 @@ eclipse.preferences.version=1 encoding//src/main/java=UTF-8 encoding//src/main/resources=UTF-8 -encoding//src/test/java=UTF-8 -encoding//src/test/resources=UTF-8 encoding/<project>=UTF-8 diff --git a/vipra-cmd/build.xml b/vipra-cmd/build.xml index 9b61c8d796cb4213c1c93de6e25bfdc04e865000..7e15723ddb97d2c786aefcec39599035a1a03768 100644 --- a/vipra-cmd/build.xml +++ b/vipra-cmd/build.xml @@ -14,13 +14,15 @@ </manifest> <fileset dir="${dir.buildfile}/target/classes"/> <fileset dir="/home/eike/Repositories/fu/ss15/ma/impl/vipra-util/target/classes"/> + <fileset dir="/home/eike/Repositories/fu/ss15/ma/impl/jgibblda/target/classes"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/commons-io/commons-io/2.4/commons-io-2.4.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/apache/logging/log4j/log4j-api/2.4.1/log4j-api-2.4.1.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/apache/logging/log4j/log4j-core/2.4.1/log4j-core-2.4.1.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/apache/logging/log4j/log4j-slf4j-impl/2.4.1/log4j-slf4j-impl-2.4.1.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/slf4j/slf4j-api/1.7.12/slf4j-api-1.7.12.jar"/> - <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/mongodb/mongodb-driver/3.0.4/mongodb-driver-3.0.4.jar"/> - <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/mongodb/bson/3.0.4/bson-3.0.4.jar"/> - <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/mongodb/mongodb-driver-core/3.0.4/mongodb-driver-core-3.0.4.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/mongodb/mongodb-driver/3.2.0/mongodb-driver-3.2.0.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/mongodb/bson/3.2.0/bson-3.2.0.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/mongodb/mongodb-driver-core/3.2.0/mongodb-driver-core-3.2.0.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/elasticsearch/elasticsearch/2.1.0/elasticsearch-2.1.0.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/apache/lucene/lucene-core/5.3.1/lucene-core-5.3.1.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/apache/lucene/lucene-backward-codecs/5.3.1/lucene-backward-codecs-5.3.1.jar"/> @@ -52,8 +54,19 @@ <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/hdrhistogram/HdrHistogram/2.1.6/HdrHistogram-2.1.6.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/commons-cli/commons-cli/1.3.1/commons-cli-1.3.1.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/com/twitter/jsr166e/1.1.0/jsr166e-1.1.0.jar"/> - <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/commons-io/commons-io/2.4/commons-io-2.4.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/args4j/args4j/2.0.6/args4j-2.0.6.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/com/googlecode/json-simple/json-simple/1.1.1/json-simple-1.1.1.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/edu/stanford/nlp/stanford-corenlp/3.5.2/stanford-corenlp-3.5.2.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/com/io7m/xom/xom/1.2.10/xom-1.2.10.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/xml-apis/xml-apis/1.3.03/xml-apis-1.3.03.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/xerces/xercesImpl/2.8.0/xercesImpl-2.8.0.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/xalan/xalan/2.7.0/xalan-2.7.0.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/de/jollyday/jollyday/0.4.7/jollyday-0.4.7.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/javax/xml/bind/jaxb-api/2.2.7/jaxb-api-2.2.7.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/com/googlecode/efficient-java-matrix-library/ejml/0.23/ejml-0.23.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/javax/json/javax.json-api/1.0/javax.json-api-1.0.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/apache/lucene/lucene-core/5.4.0/lucene-core-5.4.0.jar"/> + <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/apache/lucene/lucene-analyzers-common/5.4.0/lucene-analyzers-common-5.4.0.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/junit/junit/4.12/junit-4.12.jar"/> <zipfileset excludes="META-INF/*.SF" src="/home/eike/.m2/repository/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar"/> </jar> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java b/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java index f7619cd020eba22fc17e06dcb716e58910b6cb19..07fd04444b308971ea71e336f4b54cabb30bfe82 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java @@ -11,13 +11,19 @@ public class CmdOptions extends Options { public static final String OPT_HELP = "h"; public static final String OPT_HELP_LONG = "help"; + public static final String OPT_IMPORT = "i"; public static final String OPT_IMPORT_LONG = "import"; + public static final String OPT_SHELL = "x"; public static final String OPT_SHELL_LONG = "shell"; + public static final String OPT_DELETE = "d"; public static final String OPT_DELETE_LONG = "delete"; + public static final String OPT_CLEAR = "c"; + public static final String OPT_CLEAR_LONG = "clear"; + public CmdOptions() { addOption(Option.builder(OPT_HELP).longOpt(OPT_HELP_LONG).desc("print this message").build()); addOption(Option.builder(OPT_SHELL).longOpt(OPT_SHELL_LONG).hasArg(true).argName("name") @@ -28,6 +34,7 @@ public class CmdOptions extends Options { .desc("import articles into the database").build()); group.addOption(Option.builder(OPT_DELETE).longOpt(OPT_DELETE_LONG).hasArgs().argName("files/ids...") .desc("delete articles from the database").build()); + group.addOption(Option.builder(OPT_CLEAR).longOpt(OPT_CLEAR_LONG).desc("clear database and filebase").build()); addOptionGroup(group); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java index 46197f2bbf427e671873fbdd7be25add4614b470..5ffef5a07a556656af599f07d04450616cbbc66e 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java @@ -9,6 +9,7 @@ import org.apache.commons.cli.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import de.vipra.cmd.option.ClearCommand; import de.vipra.cmd.option.Command; import de.vipra.cmd.option.DeleteCommand; import de.vipra.cmd.option.ImportCommand; @@ -49,6 +50,8 @@ public class Main { c = new ImportCommand(cline.getOptionValues(OPT_IMPORT)); } else if (cline.hasOption(OPT_DELETE)) { c = new DeleteCommand(cline.getOptionValues(OPT_DELETE)); + } else if (cline.hasOption(OPT_CLEAR)) { + c = new ClearCommand(); } if (c != null) { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/ex/ClearException.java b/vipra-cmd/src/main/java/de/vipra/cmd/ex/ClearException.java new file mode 100644 index 0000000000000000000000000000000000000000..9c28a8c2f29a2a40c2d3dd4daea33be60022a91b --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/ex/ClearException.java @@ -0,0 +1,15 @@ +package de.vipra.cmd.ex; + +public class ClearException extends Exception { + + private static final long serialVersionUID = 1L; + + public ClearException(String msg) { + super(msg); + } + + public ClearException(Exception e) { + super(e); + } + +} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java new file mode 100644 index 0000000000000000000000000000000000000000..00f49a7752c0c7e6af97ac8dc37335f21d2c081a --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ClearCommand.java @@ -0,0 +1,65 @@ +package de.vipra.cmd.option; + +import java.io.File; +import java.io.IOException; + +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import de.vipra.cmd.ExecutionException; +import de.vipra.cmd.ex.ClearException; +import de.vipra.cmd.model.Article; +import de.vipra.util.Config; +import de.vipra.util.ConsoleUtils; +import de.vipra.util.Constants; +import de.vipra.util.ex.ConfigException; +import de.vipra.util.service.DatabaseService; + +public class ClearCommand implements Command { + + public static final Logger log = LoggerFactory.getLogger(ClearCommand.class); + public static final Logger out = LoggerFactory.getLogger("shellout"); + + private Config config; + private DatabaseService<Article> dbArticles; + + private void clear() throws ClearException, ConfigException { + try { + config = Config.getConfig(); + dbArticles = DatabaseService.getDatabaseService(config, Constants.Collection.ARTICLES, Article.class); + } catch (Exception e) { + throw new ClearException(e); + } + + out.info("clearing database"); + dbArticles.drop(); + + out.info("clearing filebase"); + File dataDir = config.getDataDirectory(); + if (dataDir.exists() && dataDir.isDirectory()) { + try { + FileUtils.deleteDirectory(dataDir); + } catch (IOException e) { + out.warn("could not delete data directory: " + dataDir.getAbsolutePath()); + } + } + } + + @Override + public void run() throws ExecutionException { + out.info("to confirm clearing, type 'clear' and press enter"); + try { + System.out.print("> "); + String in = ConsoleUtils.readLine().toLowerCase().trim(); + if (in.equals("clear")) { + clear(); + } + } catch (IOException e) { + log.error("io error: " + e.getMessage()); + } catch (ClearException | ConfigException e) { + throw new ExecutionException(e); + } + } + +} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 34869e1b4fa2684b49dab0f01aacd80748707239..b23864acc06a3a0f05651659159deb6a70c1e3d6 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -154,6 +154,7 @@ public class ImportCommand implements Command { timer.start(); // import files into database and filebase + out.info("file import"); List<Article> articles = new ArrayList<>(); for (File file : files) { articles.addAll(importFile(file)); @@ -161,10 +162,12 @@ public class ImportCommand implements Command { long durImport = timer.lap(); // write filebase + out.info("writing file index"); filebase.close(); long durIndex = timer.lap(); // do topic modeling + out.info("topic modeling"); analyzer.analyze(); long durAnalyze = timer.lap(); diff --git a/vipra-cmd/src/main/resources/log4j2.xml b/vipra-cmd/src/main/resources/log4j2.xml index c28b8da1deb9d9c4c53d81fdb830230a3a86fe8b..1da679bac62e41de1aec19c1e28be2c768858d9e 100644 --- a/vipra-cmd/src/main/resources/log4j2.xml +++ b/vipra-cmd/src/main/resources/log4j2.xml @@ -9,6 +9,6 @@ <Root level="ERROR"> <AppenderRef ref="Console" /> </Root> - <Logger name="shellout" level="ALL"/> + <Logger name="shellout" level="ALL" /> </Loggers> </Configuration> \ No newline at end of file diff --git a/vipra-util/.classpath b/vipra-util/.classpath index 13c35ff075287036386ea459ee87b13c4e6be365..f0cbe96b457b57ffa31e74d3c0f87d16bb366423 100644 --- a/vipra-util/.classpath +++ b/vipra-util/.classpath @@ -6,11 +6,6 @@ <attribute name="maven.pomderived" value="true"/> </attributes> </classpathentry> - <classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources"> - <attributes> - <attribute name="maven.pomderived" value="true"/> - </attributes> - </classpathentry> <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> <attributes> <attribute name="maven.pomderived" value="true"/> @@ -22,11 +17,5 @@ <attribute name="maven.pomderived" value="true"/> </attributes> </classpathentry> - <classpathentry kind="src" output="target/test-classes" path="src/test/java"> - <attributes> - <attribute name="optional" value="true"/> - <attribute name="maven.pomderived" value="true"/> - </attributes> - </classpathentry> <classpathentry kind="output" path="target/classes"/> </classpath> diff --git a/vipra-util/.settings/org.eclipse.core.resources.prefs b/vipra-util/.settings/org.eclipse.core.resources.prefs index abdea9ac032d4655898933f93050f48bf9581d14..e9441bb123ec3e1ab029c7eac896bc45681d9a71 100644 --- a/vipra-util/.settings/org.eclipse.core.resources.prefs +++ b/vipra-util/.settings/org.eclipse.core.resources.prefs @@ -1,4 +1,3 @@ eclipse.preferences.version=1 encoding//src/main/java=UTF-8 -encoding//src/main/resources=UTF-8 encoding/<project>=UTF-8 diff --git a/vipra-util/.settings/org.eclipse.wst.common.component b/vipra-util/.settings/org.eclipse.wst.common.component index 217de769b1c939ce8569041d55e0227a3473b779..98cb9759f376998c345804d1b8ac743309b0b3ae 100644 --- a/vipra-util/.settings/org.eclipse.wst.common.component +++ b/vipra-util/.settings/org.eclipse.wst.common.component @@ -1,6 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?><project-modules id="moduleCoreId" project-version="1.5.0"> <wb-module deploy-name="vipra-util"> <wb-resource deploy-path="/" source-path="/src/main/java"/> - <wb-resource deploy-path="/" source-path="/src/main/resources"/> </wb-module> </project-modules> diff --git a/vipra-util/src/main/java/de/vipra/util/Config.java b/vipra-util/src/main/java/de/vipra/util/Config.java index d4abb9fbff65a3fd8a76562820572fa8ffaecd7f..70892015d160d666c80fd39cc717950bcf67b07e 100644 --- a/vipra-util/src/main/java/de/vipra/util/Config.java +++ b/vipra-util/src/main/java/de/vipra/util/Config.java @@ -36,7 +36,32 @@ public class Config { private final Properties props = new Properties(); private Config() throws IOException, ConfigException { - load(FileUtils.getResource(Constants.CONFIG_FILE)); + InputStream in = null; + + // load config from environment + String configPath = System.getenv("VIPRA_CONFIG"); + if (configPath != null && configPath.length() > 0) { + File file = new File(configPath); + if (file.exists() && file.isFile()) { + in = FileUtils.openInputStream(file); + } + } + + // load config from generic config dir + File configDir = getGenericConfigDir(); + if (configDir != null && configDir.exists() && configDir.isDirectory()) { + File file = new File(configDir, "config.properties"); + if (file.exists() && file.isFile()) { + in = FileUtils.openInputStream(file); + } + } + + // load config from source + if (in == null) { + in = FileUtils.getResource(Constants.CONFIG_FILE); + } + + load(in); } private void load(InputStream is) throws ConfigException, IOException { @@ -83,6 +108,11 @@ public class Config { return new File(base, Constants.FB_DIR); } + public static File getGenericConfigDir() { + File base = PathUtils.appConfigDir(); + return new File(base, Constants.FB_DIR); + } + public static Config getConfig() throws IOException, ConfigException { if (config == null) { config = new Config(); diff --git a/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java b/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..af55646b0eb3710f0323cf559edcc39f41538e5f --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java @@ -0,0 +1,14 @@ +package de.vipra.util; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; + +public class ConsoleUtils { + + public static String readLine() throws IOException { + BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); + return in.readLine(); + } + +} diff --git a/vipra-util/src/main/java/de/vipra/util/FileUtils.java b/vipra-util/src/main/java/de/vipra/util/FileUtils.java index 935541e22e9b512e5706ec89d67452449470b4cb..d9b6bbd2403d0b50079d77145c98c7774f224636 100644 --- a/vipra-util/src/main/java/de/vipra/util/FileUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/FileUtils.java @@ -11,16 +11,28 @@ import java.util.List; public class FileUtils extends org.apache.commons.io.FileUtils { + public static final boolean isJAR; + + static { + String classResource = FileUtils.class.getResource("FileUtils.class").toString(); + isJAR = classResource.startsWith("jar:"); + } + public static List<String> readFile(File file) throws IOException { return Files.readAllLines(Paths.get(file.getAbsolutePath()), Constants.FB_ENCODING); } public static InputStream getResource(String name) { + while (name.startsWith("/")) + name = name.substring(1); InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(name); if (is == null) { is = Config.class.getResourceAsStream(name); } - return is; + if (isJAR && !name.startsWith("resources") && getResource("resources/") != null) + return getResource("resources/" + name); + else + return is; } public static int countLines(File file) throws IOException { diff --git a/vipra-util/src/main/java/de/vipra/util/PathUtils.java b/vipra-util/src/main/java/de/vipra/util/PathUtils.java index dd8a00b63a5f6e035308fada36dcda7e6aa52e6b..7e5524fcfbdf300be8d1f74e4f67dd4908e3bdfa 100644 --- a/vipra-util/src/main/java/de/vipra/util/PathUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/PathUtils.java @@ -18,4 +18,18 @@ public class PathUtils { return base; } + public static File appConfigDir() { + String os = System.getProperty("os.name").toUpperCase(); + File base = null; + if (os.contains("WIN")) { + base = new File(System.getProperty("APPDATA")); + } else if (os.contains("MAC")) { + base = new File(System.getProperty("user.home") + File.separator + "Library" + File.separator + + "ApplicationSupport"); + } else { + base = new File(System.getProperty("user.home") + File.separator + ".config"); + } + return base; + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java index c930da6cac33b6ff9243301ca0844c6aa20e0731..a49f30d37713e2ef0eb298054ebe3f50c8d667b8 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java +++ b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java @@ -101,6 +101,10 @@ public class DatabaseService<T extends Model> implements Service<T, DatabaseExce t.fromDocument(docNew); return result.getModifiedCount(); } + + public void drop() { + collection.drop(); + } public static <T extends Model> DatabaseService<T> getDatabaseService(Config config, Constants.Collection collection, Class<T> clazz) throws ConfigException { diff --git a/vipra-util/src/main/java/de/vipra/util/service/Service.java b/vipra-util/src/main/java/de/vipra/util/service/Service.java index 824a6a97de4b86e335b9c8b7255d7a8de5fbcfe5..a55b1aea721109f0f3dd141a266df08954a6f10e 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/Service.java +++ b/vipra-util/src/main/java/de/vipra/util/service/Service.java @@ -12,4 +12,6 @@ public interface Service<T extends Model, E extends Exception> { long updateSingle(T t) throws E; + void drop(); + } diff --git a/vipra-util/src/main/resources/config.properties b/vipra-util/src/main/resources/config.properties deleted file mode 100644 index 06623e215e66a3dfaefbdf3baabce5e5aeb14412..0000000000000000000000000000000000000000 --- a/vipra-util/src/main/resources/config.properties +++ /dev/null @@ -1,5 +0,0 @@ -db.host=localhost -db.port=27017 -db.name=test - -fb.path=${datadir}/vipra \ No newline at end of file