diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java index f71b73e8afe8e2c460cb54c7c5a3a48d454662a5..dfe0148ce6368d489d989526b7755c7d1cafd31d 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java @@ -33,7 +33,7 @@ public abstract class Filebase implements Closeable { } } try { - this.index = new FilebaseIndex(getModelFile("index")); + this.index = new FilebaseIndex(modelDir); } catch (IOException e) { throw new FilebaseException("could not read index: " + e.getMessage()); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java index 63e128133c1add26f31b9cfa4ea07fb406d7de85..365141a0141c8183513973306331feba8bac9d4f 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java @@ -59,12 +59,10 @@ public class DTMAnalyzer extends Analyzer { this.outDirSeq = new File(outDir, "lda-seq"); this.vocab = new DTMVocabulary(modelDir); this.sequences = new DTMSequenceIndex(modelDir); - index = new FilebaseIndex(modelDir); - - config = Config.getConfig(); - dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); - dbTopics = MongoService.getDatabaseService(config, TopicFull.class); - dbWords = MongoService.getDatabaseService(config, Word.class); + this.index = new FilebaseIndex(modelDir); + this.dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); + this.dbTopics = MongoService.getDatabaseService(config, TopicFull.class); + this.dbWords = MongoService.getDatabaseService(config, Word.class); } catch (ConfigException | IOException | ParseException e) { throw new AnalyzerException(e); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java index 98f50d2f4bccb7b528570b4c1c7f32b610987217..80961888bc499d5b82a86a813125a37a6cf21b4e 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map.Entry; @@ -213,6 +214,8 @@ public class JGibbAnalyzer extends Analyzer { ref.setShare((double) ref.getCount() / reducedCount); if (!newTopicRefs.isEmpty()) { + Collections.sort(newTopicRefs, Comparator.reverseOrder()); + // update article with topic references (partial update) ArticleFull article = new ArticleFull(); article.setId(index.get(articleIndex++)); diff --git a/vipra-cmd/src/main/resources/config.properties b/vipra-cmd/src/main/resources/config.properties index 80c62240811d3f1eeb9f19f17962b32649f6a382..61bfe4a9fb5a057b497c5947af9db341ff191c81 100644 --- a/vipra-cmd/src/main/resources/config.properties +++ b/vipra-cmd/src/main/resources/config.properties @@ -4,5 +4,5 @@ db.name=test es.host=localhost es.port=9300 tm.processor=corenlp -tm.analyzer=dtm +tm.analyzer=jgibb tm.dtmpath=/home/eike/repos/master/dtm_release/dtm/main \ No newline at end of file