From 840a49ee9a44a94f75d16f7d054d7c2296f65e80 Mon Sep 17 00:00:00 2001
From: Eike Cochu <eike@cochu.com>
Date: Fri, 26 Feb 2016 18:57:24 +0100
Subject: [PATCH] fixed jgibb modeling problems

fixed index file created as directory
added topicref initial reverse sorting
removed unused config reference
---
 .../src/main/java/de/vipra/cmd/file/Filebase.java      |  2 +-
 .../src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java    | 10 ++++------
 .../src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java  |  3 +++
 vipra-cmd/src/main/resources/config.properties         |  2 +-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java
index f71b73e8..dfe0148c 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java
@@ -33,7 +33,7 @@ public abstract class Filebase implements Closeable {
 			}
 		}
 		try {
-			this.index = new FilebaseIndex(getModelFile("index"));
+			this.index = new FilebaseIndex(modelDir);
 		} catch (IOException e) {
 			throw new FilebaseException("could not read index: " + e.getMessage());
 		}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java
index 63e12813..365141a0 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java
@@ -59,12 +59,10 @@ public class DTMAnalyzer extends Analyzer {
 			this.outDirSeq = new File(outDir, "lda-seq");
 			this.vocab = new DTMVocabulary(modelDir);
 			this.sequences = new DTMSequenceIndex(modelDir);
-			index = new FilebaseIndex(modelDir);
-
-			config = Config.getConfig();
-			dbArticles = MongoService.getDatabaseService(config, ArticleFull.class);
-			dbTopics = MongoService.getDatabaseService(config, TopicFull.class);
-			dbWords = MongoService.getDatabaseService(config, Word.class);
+			this.index = new FilebaseIndex(modelDir);
+			this.dbArticles = MongoService.getDatabaseService(config, ArticleFull.class);
+			this.dbTopics = MongoService.getDatabaseService(config, TopicFull.class);
+			this.dbWords = MongoService.getDatabaseService(config, Word.class);
 		} catch (ConfigException | IOException | ParseException e) {
 			throw new AnalyzerException(e);
 		}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java
index 98f50d2f..80961888 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java
@@ -7,6 +7,7 @@ import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map.Entry;
@@ -213,6 +214,8 @@ public class JGibbAnalyzer extends Analyzer {
 					ref.setShare((double) ref.getCount() / reducedCount);
 
 				if (!newTopicRefs.isEmpty()) {
+					Collections.sort(newTopicRefs, Comparator.reverseOrder());
+
 					// update article with topic references (partial update)
 					ArticleFull article = new ArticleFull();
 					article.setId(index.get(articleIndex++));
diff --git a/vipra-cmd/src/main/resources/config.properties b/vipra-cmd/src/main/resources/config.properties
index 80c62240..61bfe4a9 100644
--- a/vipra-cmd/src/main/resources/config.properties
+++ b/vipra-cmd/src/main/resources/config.properties
@@ -4,5 +4,5 @@ db.name=test
 es.host=localhost
 es.port=9300
 tm.processor=corenlp
-tm.analyzer=dtm
+tm.analyzer=jgibb
 tm.dtmpath=/home/eike/repos/master/dtm_release/dtm/main
\ No newline at end of file
-- 
GitLab