From 840a49ee9a44a94f75d16f7d054d7c2296f65e80 Mon Sep 17 00:00:00 2001 From: Eike Cochu <eike@cochu.com> Date: Fri, 26 Feb 2016 18:57:24 +0100 Subject: [PATCH] fixed jgibb modeling problems fixed index file created as directory added topicref initial reverse sorting removed unused config reference --- .../src/main/java/de/vipra/cmd/file/Filebase.java | 2 +- .../src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java | 10 ++++------ .../src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java | 3 +++ vipra-cmd/src/main/resources/config.properties | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java index f71b73e8..dfe0148c 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java @@ -33,7 +33,7 @@ public abstract class Filebase implements Closeable { } } try { - this.index = new FilebaseIndex(getModelFile("index")); + this.index = new FilebaseIndex(modelDir); } catch (IOException e) { throw new FilebaseException("could not read index: " + e.getMessage()); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java index 63e12813..365141a0 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java @@ -59,12 +59,10 @@ public class DTMAnalyzer extends Analyzer { this.outDirSeq = new File(outDir, "lda-seq"); this.vocab = new DTMVocabulary(modelDir); this.sequences = new DTMSequenceIndex(modelDir); - index = new FilebaseIndex(modelDir); - - config = Config.getConfig(); - dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); - dbTopics = MongoService.getDatabaseService(config, TopicFull.class); - dbWords = MongoService.getDatabaseService(config, Word.class); + this.index = new FilebaseIndex(modelDir); + this.dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); + this.dbTopics = MongoService.getDatabaseService(config, TopicFull.class); + this.dbWords = MongoService.getDatabaseService(config, Word.class); } catch (ConfigException | IOException | ParseException e) { throw new AnalyzerException(e); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java index 98f50d2f..80961888 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map.Entry; @@ -213,6 +214,8 @@ public class JGibbAnalyzer extends Analyzer { ref.setShare((double) ref.getCount() / reducedCount); if (!newTopicRefs.isEmpty()) { + Collections.sort(newTopicRefs, Comparator.reverseOrder()); + // update article with topic references (partial update) ArticleFull article = new ArticleFull(); article.setId(index.get(articleIndex++)); diff --git a/vipra-cmd/src/main/resources/config.properties b/vipra-cmd/src/main/resources/config.properties index 80c62240..61bfe4a9 100644 --- a/vipra-cmd/src/main/resources/config.properties +++ b/vipra-cmd/src/main/resources/config.properties @@ -4,5 +4,5 @@ db.name=test es.host=localhost es.port=9300 tm.processor=corenlp -tm.analyzer=dtm +tm.analyzer=jgibb tm.dtmpath=/home/eike/repos/master/dtm_release/dtm/main \ No newline at end of file -- GitLab