From aac3b9e6f1caab474ff8cf65f5125f187d79760d Mon Sep 17 00:00:00 2001 From: Eike Cochu <eike@cochu.com> Date: Fri, 25 Dec 2015 15:45:52 +0100 Subject: [PATCH] updated enum handling with config keys added closeable interface for filebase classes added vocab class for filebase vocabulary --- .../main/java/de/vipra/cmd/file/Filebase.java | 27 ++++++++----- .../java/de/vipra/cmd/file/FilebaseIndex.java | 10 ++++- .../de/vipra/cmd/file/FilebaseVocabulary.java | 35 +++++++++++++++++ .../java/de/vipra/cmd/file/JGibbFilebase.java | 3 +- .../java/de/vipra/cmd/file/LdacFilebase.java | 3 +- .../de/vipra/cmd/option/ImportCommand.java | 2 +- .../java/de/vipra/cmd/text/Preprocessor.java | 9 +++-- .../main/java/de/vipra/util/Constants.java | 38 ++++++++++++++++++- 8 files changed, 107 insertions(+), 20 deletions(-) create mode 100644 vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java index 96714d44..4556d2c1 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java @@ -1,23 +1,27 @@ package de.vipra.cmd.file; +import java.io.Closeable; import java.io.File; import java.io.IOException; import de.vipra.cmd.ex.FilebaseException; import de.vipra.cmd.model.Article; import de.vipra.util.Config; +import de.vipra.util.Constants; import de.vipra.util.Config.Key; import de.vipra.util.ex.ConfigException; -public abstract class Filebase { +public abstract class Filebase implements Closeable { private final File dataDir; private final FilebaseIndex index; + private final FilebaseVocabulary vocab; public Filebase(File dataDir) throws FilebaseException { this.dataDir = dataDir; try { - this.index = new FilebaseIndex(new File(dataDir, "asd")); + this.index = new FilebaseIndex(new File(dataDir, Constants.INDEX_FILE)); + this.vocab = new FilebaseVocabulary(new File(dataDir, Constants.VOCAB_FILE)); } catch (IOException e) { throw new FilebaseException("could not read index: " + e.getMessage()); } @@ -27,14 +31,18 @@ public abstract class Filebase { return dataDir; } - public void writeIndex() throws IOException { - index.write(); - } - public void remove(Article article) throws FilebaseException { remove(article.getId()); } + @Override + public void close() throws IOException { + write(); + index.close(); + vocab.close(); + + } + public abstract void add(Article article) throws FilebaseException; public abstract void remove(String id) throws FilebaseException; @@ -43,10 +51,11 @@ public abstract class Filebase { public static Filebase getFilebase(Config config) throws FilebaseException, ConfigException { File dataDir = config.getDataDirectory(); - switch (config.getString(Key.ANALYZER).toLowerCase()) { - case "ldac": + switch (Constants.Analyzer.fromString(config.getString(Key.ANALYZER))) { + case LDAC: return new LdacFilebase(dataDir); - case "jgibb": + case JGIBB: + case DEFAULT: default: return new JGibbFilebase(dataDir); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java index 662f43df..516a5caa 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java @@ -1,5 +1,6 @@ package de.vipra.cmd.file; +import java.io.Closeable; import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -8,7 +9,7 @@ import java.util.List; import de.vipra.util.Constants; import de.vipra.util.FileUtils; -public class FilebaseIndex { +public class FilebaseIndex implements Closeable { private final File file; private final List<String> index; @@ -22,7 +23,7 @@ public class FilebaseIndex { } } - public void write() throws IOException { + private void write() throws IOException { FileUtils.writeLines(file, Constants.FB_ENCODING.name(), index, null, false); } @@ -43,4 +44,9 @@ public class FilebaseIndex { return index.remove(id); } + @Override + public void close() throws IOException { + write(); + } + } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java new file mode 100644 index 00000000..759850be --- /dev/null +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java @@ -0,0 +1,35 @@ +package de.vipra.cmd.file; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import de.vipra.util.Constants; +import de.vipra.util.FileUtils; + +public class FilebaseVocabulary implements Closeable { + + private File file; + private List<String> vocables; + + public FilebaseVocabulary(File file) throws IOException { + this.file = file; + if (file.exists()) { + vocables = new ArrayList<>(FileUtils.readFile(file)); + } else { + vocables = new ArrayList<>(); + } + } + + private void write() throws IOException { + FileUtils.writeLines(file, Constants.FB_ENCODING.name(), vocables, null, false); + } + + @Override + public void close() throws IOException { + write(); + } + +} diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java index 1dd885ad..4332a82f 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java @@ -27,7 +27,8 @@ public class JGibbFilebase extends Filebase { @Override public void write() throws IOException { - writeIndex(); + // TODO Auto-generated method stub + } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java index d6c4bbb9..5699752c 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java @@ -27,7 +27,8 @@ public class LdacFilebase extends Filebase { @Override public void write() throws IOException { - writeIndex(); + // TODO Auto-generated method stub + } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 5a6559ba..6396b63b 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -165,7 +165,7 @@ public class ImportCommand implements Command { long durAnalyze = timer.lap(); // write file index - filebase.writeIndex(); + filebase.close(); long durIndex = timer.lap(); out.info("imported " + articles.size() + " " + (articles.size() == 1 ? "article" : "articles")); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java index f130cddb..3de9ceb5 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/Preprocessor.java @@ -9,7 +9,7 @@ import de.vipra.util.Constants; import de.vipra.util.Config.Key; public abstract class Preprocessor { - + public abstract String getName(); public abstract String preprocess(String input) throws PreprocessorException; @@ -20,10 +20,11 @@ public abstract class Preprocessor { stopWords = Constants.STOPWORDS; } - switch (config.getString(Key.PREPROCESSOR)) { - case "custom": + switch (Constants.Preprocessor.fromString(config.getString(Key.PREPROCESSOR))) { + case CUSTOM: return new CustomPreprocessor(stopWords); - case "lucene": + case LUCENE: + case DEFAULT: default: return new LucenePreprocessor(stopWords); } diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index c135c812..ea2c0db0 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -11,6 +11,8 @@ public class Constants { public static final Charset FB_ENCODING = StandardCharsets.UTF_8; public static final String CONFIG_FILE = "config.properties"; + public static final String INDEX_FILE = "index"; + public static final String VOCAB_FILE = "vocab"; public static final String DEFAULT_HOST = "localhost"; public static final int DEFAULT_PORT = 27017; @@ -37,24 +39,56 @@ public class Constants { public static enum Preprocessor { CUSTOM("custom"), - LUCENE("lucene"); + LUCENE("lucene"), + DEFAULT(LUCENE); public final String name; private Preprocessor(String name) { this.name = name; } + + private Preprocessor(Preprocessor def) { + this.name = def.name; + } + + public static Preprocessor fromString(String text) { + if (text != null) { + for (Preprocessor b : Preprocessor.values()) { + if (text.equalsIgnoreCase(b.name)) { + return b; + } + } + } + return DEFAULT; + } } public static enum Analyzer { LDAC("ldac"), - JGIBB("jgibb"); + JGIBB("jgibb"), + DEFAULT(JGIBB); public final String name; private Analyzer(String name) { this.name = name; } + + private Analyzer(Analyzer def) { + this.name = def.name; + } + + public static Analyzer fromString(String text) { + if (text != null) { + for (Analyzer b : Analyzer.values()) { + if (text.equalsIgnoreCase(b.name)) { + return b; + } + } + } + return DEFAULT; + } } } -- GitLab