diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java index 96714d44f700c542d29e3734f1aea11e948976c1..4556d2c1817a7d9e5c3e8215eb01b8d7a79e432f 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java @@ -1,23 +1,27 @@ package de.vipra.cmd.file; +import java.io.Closeable; import java.io.File; import java.io.IOException; import de.vipra.cmd.ex.FilebaseException; import de.vipra.cmd.model.Article; import de.vipra.util.Config; +import de.vipra.util.Constants; import de.vipra.util.Config.Key; import de.vipra.util.ex.ConfigException; -public abstract class Filebase { +public abstract class Filebase implements Closeable { private final File dataDir; private final FilebaseIndex index; + private final FilebaseVocabulary vocab; public Filebase(File dataDir) throws FilebaseException { this.dataDir = dataDir; try { - this.index = new FilebaseIndex(new File(dataDir, "asd")); + this.index = new FilebaseIndex(new File(dataDir, Constants.INDEX_FILE)); + this.vocab = new FilebaseVocabulary(new File(dataDir, Constants.VOCAB_FILE)); } catch (IOException e) { throw new FilebaseException("could not read index: " + e.getMessage()); } @@ -27,14 +31,18 @@ public abstract class Filebase { return dataDir; } - public void writeIndex() throws IOException { - index.write(); - } - public void remove(Article article) throws FilebaseException { remove(article.getId()); } + @Override + public void close() throws IOException { + write(); + index.close(); + vocab.close(); + + } + public abstract void add(Article article) throws FilebaseException; public abstract void remove(String id) throws FilebaseException; @@ -43,10 +51,11 @@ public abstract class Filebase { public static Filebase getFilebase(Config config) throws FilebaseException, ConfigException { File dataDir = config.getDataDirectory(); - switch (config.getString(Key.ANALYZER).toLowerCase()) { - case "ldac": + switch (Constants.Analyzer.fromString(config.getString(Key.ANALYZER))) { + case LDAC: return new LdacFilebase(dataDir); - case "jgibb": + case JGIBB: + case DEFAULT: default: return new JGibbFilebase(dataDir); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java index 662f43df6562def721eee32c0afa1d1a0ffc2f73..516a5caa17a60095b98b8e5b815c4256d470846a 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java @@ -1,5 +1,6 @@ package de.vipra.cmd.file; +import java.io.Closeable; import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -8,7 +9,7 @@ import java.util.List; import de.vipra.util.Constants; import de.vipra.util.FileUtils; -public class FilebaseIndex { +public class FilebaseIndex implements Closeable { private final File file; private final List<String> index; @@ -22,7 +23,7 @@ public class FilebaseIndex { } } - public void write() throws IOException { + private void write() throws IOException { FileUtils.writeLines(file, Constants.FB_ENCODING.name(), index, null, false); } @@ -43,4 +44,9 @@ public class FilebaseIndex { return index.remove(id); } + @Override + public void close() throws IOException { + write(); + } + } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java new file mode 100644 index 0000000000000000000000000000000000000000..759850be3eb98a59e01423bcfe115222f5dd4298 --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java @@ -0,0 +1,35 @@ +package de.vipra.cmd.file; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import de.vipra.util.Constants; +import de.vipra.util.FileUtils; + +public class FilebaseVocabulary implements Closeable { + + private File file; + private List<String> vocables; + + public FilebaseVocabulary(File file) throws IOException { + this.file = file; + if (file.exists()) { + vocables = new ArrayList<>(FileUtils.readFile(file)); + } else { + vocables = new ArrayList<>(); + } + } + + private void write() throws IOException { + FileUtils.writeLines(file, Constants.FB_ENCODING.name(), vocables, null, false); + } + + @Override + public void close() throws IOException { + write(); + } + +} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java index 1dd885ad808138387fbeeceda40b7b1fe925e187..4332a82f6f3df5d9a2b77de779bdf50e870be055 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java @@ -27,7 +27,8 @@ public class JGibbFilebase extends Filebase { @Override public void write() throws IOException { - writeIndex(); + // TODO Auto-generated method stub + } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java index d6c4bbb9221b32c905520fc42056d7ec96b2ce76..5699752c7efd26e69ff51e10bb4a5f8f6c8d0a6d 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java @@ -27,7 +27,8 @@ public class LdacFilebase extends Filebase { @Override public void write() throws IOException { - writeIndex(); + // TODO Auto-generated method stub + } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 5a6559ba7b962dffe98daf1d97b8da876cfed938..6396b63b5414604878e024c09123ea328ad85923 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -165,7 +165,7 @@ public class ImportCommand implements Command { long durAnalyze = timer.lap(); // write file index - filebase.writeIndex(); + filebase.close(); long durIndex = timer.lap(); out.info("imported " + articles.size() + " " + (articles.size() == 1 ? "article" : "articles")); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java index f130cddb5b54c7e0263809915ffc62c340ff24eb..3de9ceb5dd3b61248177bf7cc6ea606bc41aa1f6 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java @@ -9,7 +9,7 @@ import de.vipra.util.Constants; import de.vipra.util.Config.Key; public abstract class Preprocessor { - + public abstract String getName(); public abstract String preprocess(String input) throws PreprocessorException; @@ -20,10 +20,11 @@ public abstract class Preprocessor { stopWords = Constants.STOPWORDS; } - switch (config.getString(Key.PREPROCESSOR)) { - case "custom": + switch (Constants.Preprocessor.fromString(config.getString(Key.PREPROCESSOR))) { + case CUSTOM: return new CustomPreprocessor(stopWords); - case "lucene": + case LUCENE: + case DEFAULT: default: return new LucenePreprocessor(stopWords); } diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index c135c812feff62740b8b182b5cc282a7924d6462..ea2c0db084820a5c1d8287f211b3eac18f45af57 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -11,6 +11,8 @@ public class Constants { public static final Charset FB_ENCODING = StandardCharsets.UTF_8; public static final String CONFIG_FILE = "config.properties"; + public static final String INDEX_FILE = "index"; + public static final String VOCAB_FILE = "vocab"; public static final String DEFAULT_HOST = "localhost"; public static final int DEFAULT_PORT = 27017; @@ -37,24 +39,56 @@ public class Constants { public static enum Preprocessor { CUSTOM("custom"), - LUCENE("lucene"); + LUCENE("lucene"), + DEFAULT(LUCENE); public final String name; private Preprocessor(String name) { this.name = name; } + + private Preprocessor(Preprocessor def) { + this.name = def.name; + } + + public static Preprocessor fromString(String text) { + if (text != null) { + for (Preprocessor b : Preprocessor.values()) { + if (text.equalsIgnoreCase(b.name)) { + return b; + } + } + } + return DEFAULT; + } } public static enum Analyzer { LDAC("ldac"), - JGIBB("jgibb"); + JGIBB("jgibb"), + DEFAULT(JGIBB); public final String name; private Analyzer(String name) { this.name = name; } + + private Analyzer(Analyzer def) { + this.name = def.name; + } + + public static Analyzer fromString(String text) { + if (text != null) { + for (Analyzer b : Analyzer.values()) { + if (text.equalsIgnoreCase(b.name)) { + return b; + } + } + } + return DEFAULT; + } } }