From 98a4ca756b90b73cf27d9392cc99eee268b18f56 Mon Sep 17 00:00:00 2001
From: Eike Cochu <eike@cochu.com>
Date: Sun, 24 Apr 2016 21:23:46 +0200
Subject: [PATCH] updated explorer, added count matrix

updated explorer split screen
updated sequence dropdown, optional cancel button
added matrix and countmatrix for topic similarities
---
 vipra-cmd/runcfg/CMD.launch                   |  2 +-
 .../main/java/de/vipra/cmd/lda/Analyzer.java  | 45 +++++++++++--
 .../de/vipra/cmd/option/EditModelCommand.java |  2 +
 .../html/directives/sequence-dropdown.html    |  3 +-
 vipra-ui/app/html/explorer.html               |  9 ++-
 vipra-ui/app/js/controllers.js                | 10 +++
 vipra-ui/app/js/directives.js                 | 11 ++-
 vipra-ui/app/less/app.less                    |  7 +-
 .../main/java/de/vipra/util/Constants.java    |  6 ++
 .../main/java/de/vipra/util/CountMatrix.java  | 14 ++++
 .../src/main/java/de/vipra/util/Matrix.java   | 67 +++++++++++++++++++
 .../main/java/de/vipra/util/model/Topic.java  |  2 +-
 .../java/de/vipra/util/model/TopicFull.java   | 14 +++-
 .../de/vipra/util/model/TopicModelConfig.java | 13 +++-
 14 files changed, 189 insertions(+), 16 deletions(-)
 create mode 100644 vipra-util/src/main/java/de/vipra/util/CountMatrix.java
 create mode 100644 vipra-util/src/main/java/de/vipra/util/Matrix.java

diff --git a/vipra-cmd/runcfg/CMD.launch b/vipra-cmd/runcfg/CMD.launch
index bb49f0c2..7b13eb9c 100644
--- a/vipra-cmd/runcfg/CMD.launch
+++ b/vipra-cmd/runcfg/CMD.launch
@@ -11,7 +11,7 @@
 </listAttribute>
 <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
 <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/>
-<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-CS test -C asd"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-S test2 -M"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/>
 <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
 <stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/>
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java
index eb184d95..793694a8 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java
@@ -23,6 +23,7 @@ import de.vipra.util.ArrayUtils;
 import de.vipra.util.CompareMap;
 import de.vipra.util.Config;
 import de.vipra.util.ConsoleUtils;
+import de.vipra.util.CountMatrix;
 import de.vipra.util.MongoUtils;
 import de.vipra.util.StringUtils;
 import de.vipra.util.ex.ConfigException;
@@ -351,9 +352,11 @@ public class Analyzer {
 			newTopic.setRisingDecayRelevance(risingDecayRelevance);
 		}
 
-		// create topic references and store document similarities
+		// create topic references, get document and topic similarities
 
+		final CountMatrix<ObjectId, ObjectId> topicShareMatrix = new CountMatrix<>();
 		int idxArticle = -1;
+
 		for (final FilebaseIDDateIndexEntry entry : idDateIndex) {
 			idxArticle++;
 
@@ -375,6 +378,12 @@ public class Analyzer {
 				}
 			}
 
+			// count topic/topic share per article
+
+			for (final TopicShare topicRef1 : newTopicRefs)
+				for (final TopicShare topicRef2 : newTopicRefs)
+					topicShareMatrix.count(topicRef1.getTopic().getId(), topicRef2.getTopic().getId());
+
 			// calculate article divergences
 
 			final List<SimilarArticle> similarArticles = new ArrayList<>(articleCount - 1);
@@ -423,13 +432,33 @@ public class Analyzer {
 		}
 
 		// remove unreferenced topics
-		
+
 		for (ListIterator<TopicFull> iter = newTopics.listIterator(); iter.hasNext();) {
 			TopicFull topic = iter.next();
 			if (topic.getArticlesCount() == 0)
 				iter.remove();
 		}
 
+		// calculate topic similarities
+
+		int topicMinCount = (int) Math.ceil(topicCount * (1 - modelConfig.getMaxSimilarTopicsDivergence()));
+
+		for (TopicFull topic1 : newTopics) {
+			final List<TopicShare> similarTopics = new ArrayList<>();
+			for (TopicFull topic2 : newTopics) {
+				if (!topic1.getId().equals(topic2.getId())) {
+					Integer count = topicShareMatrix.get(topic1.getId(), topic2.getId());
+					if (count != null && count >= topicMinCount) {
+						final TopicShare newTopicShare = new TopicShare();
+						newTopicShare.setTopic(new Topic(topic2.getId()));
+						newTopicShare.setShare((double) count / topicCount);
+						similarTopics.add(newTopicShare);
+					}
+				}
+			}
+			topic1.setSimilarTopics(similarTopics);
+		}
+
 		// recreate entities
 
 		final QueryBuilder builder = QueryBuilder.builder().eq("topicModel", new TopicModel(modelConfig.getName()));
@@ -446,10 +475,18 @@ public class Analyzer {
 
 	private int printProgress(final int tenthPercent, final double progress, final int iteration, final int maxIterationsLength,
 			final long remainingNanos, final TopicModelConfig modelConfig, final int lastLength) {
-		final String msg = " [" + StringUtils.repeat("#", tenthPercent) + StringUtils.repeat(" ", 10 - tenthPercent) + "] "
+		String msg = " [" + StringUtils.repeat("#", tenthPercent) + StringUtils.repeat(" ", 10 - tenthPercent) + "] "
 				+ StringUtils.pad(Integer.toString((int) Math.floor(progress)), 3, true) + "% ("
 				+ StringUtils.pad(Integer.toString(iteration), maxIterationsLength, true) + "/" + modelConfig.getDynamicMinIterations() + "-"
-				+ modelConfig.getDynamicMaxIterations() + ") " + StringUtils.timeString(remainingNanos, false, true, false) + "\r";
+				+ modelConfig.getDynamicMaxIterations() + ") " + StringUtils.timeString(remainingNanos, false, true, false);
+
+		// add padding if shorter than last message to clear rest of line
+		if (msg.length() < lastLength)
+			msg += StringUtils.repeat(" ", lastLength - msg.length());
+
+		// add carriage return to rewrite next line
+		msg += "\r";
+
 		ConsoleUtils.infoNOLF(msg);
 		return msg.length() - 1;
 	}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java
index 11c2c87e..43cfbf84 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java
@@ -51,6 +51,8 @@ public class EditModelCommand implements Command {
 		topicModelConfig.setRisingDecayLambda(ConsoleUtils.readDouble("rising decay lambda", topicModelConfig.getRisingDecayLambda()));
 		topicModelConfig.setMaxSimilarDocumentsDivergence(
 				ConsoleUtils.readDouble("max similar documents divergence", topicModelConfig.getMaxSimilarDocumentsDivergence(), 0.0, 1.0, true));
+		topicModelConfig.setMaxSimilarTopicsDivergence(
+				ConsoleUtils.readDouble("max similar topics divergence", topicModelConfig.getMaxSimilarTopicsDivergence(), 0.0, 1.0, true));
 		topicModelConfig
 				.setWindowResolution(ConsoleUtils.readEnum(WindowResolution.class, "window resolution", topicModelConfig.getWindowResolution()));
 		topicModelConfig.setProcessorMode(ConsoleUtils.readEnum(ProcessorMode.class, "processor mode", topicModelConfig.getProcessorMode()));
diff --git a/vipra-ui/app/html/directives/sequence-dropdown.html b/vipra-ui/app/html/directives/sequence-dropdown.html
index 17e05326..09cddc2d 100644
--- a/vipra-ui/app/html/directives/sequence-dropdown.html
+++ b/vipra-ui/app/html/directives/sequence-dropdown.html
@@ -1,5 +1,6 @@
-<ol class="nya-bs-select nya-bs-condensed" ng-model="ngModel" ng-class="{dropup:dropup}">
+<ol class="nya-bs-select nya-bs-condensed" ng-model="ngModel" ng-class="{dropup:showDropup}">
   <li value="{{sequence.id}}" class="nya-bs-option" ng-repeat="sequence in sequences">
     <a ng-bind="sequence.label"></a>
   </li>
 </ol>
+<button class="btn btn-sm btn-default" ng-click="doClear()" ng-show="showClear">Clear</button>
\ No newline at end of file
diff --git a/vipra-ui/app/html/explorer.html b/vipra-ui/app/html/explorer.html
index 9c87e856..9f467b8f 100644
--- a/vipra-ui/app/html/explorer.html
+++ b/vipra-ui/app/html/explorer.html
@@ -25,7 +25,7 @@
         <li ng-repeat="topic in topics | orderBy:explorerModels.sorttopics:explorerModels.sortdir | filter:search" ng-mouseenter="highlightSeries(topic.id, true)" ng-mouseleave="highlightSeries(topic.id, false)" ng-class="{selected:topic.selected}" class="text-muted">
           <div class="checkbox checkbox-condensed">
             <span class="valuebar" ng-style="{width:topic.topicCurrValue}"></span>
-            <input tabindex="0" type="checkbox" ng-model="topic.selected" ng-attr-id="relevance-{{::topic.id}}" ng-change="redrawGraph()">
+            <input tabindex="0" type="checkbox" ng-model="topic.selected" ng-attr-id="relevance-{{::topic.id}}" ng-change="changeSelectedTopics()">
             <label class="check" ng-attr-for="relevance-{{::topic.id}}">
               <topic-menu topic="topic" class="menu-button" />
               <span class="ellipsis" ng-attr-title="{{::topic.name}}">
@@ -91,7 +91,12 @@
       </ul>
     </div>
     <div class="col-xs-9 col-md-10 center">
-
+      <div class="wrapper">
+        <div class="topbar">
+          <small>Sequence:</small>
+          <sequence-dropdown ng-model="explorerModels.sequenceId" sequences="explorerModels.activeTopic.sequences" clear="true"></sequence-dropdown>
+        </div>
+      </div>
     </div>
   </div>
 </div>
\ No newline at end of file
diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js
index ce951ed1..396553cf 100644
--- a/vipra-ui/app/js/controllers.js
+++ b/vipra-ui/app/js/controllers.js
@@ -566,6 +566,16 @@
           t.selected = toggle ? !t.selected : to;
         }
         $scope.redrawGraph();
+        $scope.changeSelectedTopics();
+      };
+
+      $scope.changeSelectedTopics = function() {
+        if($scope.explorerModels.activeTopic && !$scope.explorerModels.activeTopic.selected) {
+          delete $scope.explorerModels.activeTopic;
+          delete $scope.explorerModels.sequenceId;
+        }
+
+        $scope.redrawGraph();
       };
 
       $scope.redrawGraph = function() {
diff --git a/vipra-ui/app/js/directives.js b/vipra-ui/app/js/directives.js
index 1aac8d1a..cc2fada9 100644
--- a/vipra-ui/app/js/directives.js
+++ b/vipra-ui/app/js/directives.js
@@ -257,10 +257,13 @@
       scope: {
         ngModel: '=',
         sequences: '=',
-        dropup: '@'
+        dropup: '@',
+        clear: '@'
       },
       link: function($scope) {
-        $scope.dropup = $scope.dropup === 'true';
+        $scope.showDropup = $scope.dropup === 'true';
+        $scope.showClear = $scope.clear === 'true';
+
         $scope.$watch('sequences', function(newValue) {
           if (newValue) {
             for (var i = 0, s; i < $scope.sequences.length; i++) {
@@ -269,6 +272,10 @@
             }
           }
         });
+
+        $scope.doClear = function() {
+          delete $scope.ngModel;
+        };
       },
       templateUrl: '/html/directives/sequence-dropdown.html'
     };
diff --git a/vipra-ui/app/less/app.less b/vipra-ui/app/less/app.less
index 3df7c073..d1a4c835 100644
--- a/vipra-ui/app/less/app.less
+++ b/vipra-ui/app/less/app.less
@@ -163,9 +163,10 @@ a:hover {
 
 .nya-bs-condensed {
   width: auto !important;
-  margin-top: -2px;
   .dropdown-toggle {
-    padding: 0px 25px 0px 12px;
+    padding: 5px 25px 5px 10px;
+    font-size: 12px;
+    line-height: 1.5;
   }
   .dropdown-menu li a {
     padding: 2px 12px;
@@ -504,7 +505,7 @@ entity-menu {
 }
 
 .seq-head-foot {
-  height: 42px;
+  height: 51px;
 }
 
 .table-compare {
diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java
index a4084fd7..ed5c39e9 100644
--- a/vipra-util/src/main/java/de/vipra/util/Constants.java
+++ b/vipra-util/src/main/java/de/vipra/util/Constants.java
@@ -107,6 +107,12 @@ public class Constants {
 	 */
 	public static final double MAX_SIMILAR_DOCUMENTS_DIVERGENCE = 0.25;
 
+	/**
+	 * Maximum divergence between a topic and similar topics. Lower values mean
+	 * more similar topics (less divergence). Default 0.25.
+	 */
+	public static final double MAX_SIMILAR_TOPICS_DIVERGENCE = 0.25;
+
 	/**
 	 * Dynamic minimum iterations. Used for dynamic topic modeling. Default 100.
 	 */
diff --git a/vipra-util/src/main/java/de/vipra/util/CountMatrix.java b/vipra-util/src/main/java/de/vipra/util/CountMatrix.java
new file mode 100644
index 00000000..7177ddc2
--- /dev/null
+++ b/vipra-util/src/main/java/de/vipra/util/CountMatrix.java
@@ -0,0 +1,14 @@
+package de.vipra.util;
+
+public class CountMatrix<T, U> extends Matrix<T, U, Integer> {
+
+	public void count(T t, U u) {
+		Integer i = get(t, u);
+		if (i == null)
+			i = 1;
+		else
+			i++;
+		put(t, u, i);
+	}
+
+}
diff --git a/vipra-util/src/main/java/de/vipra/util/Matrix.java b/vipra-util/src/main/java/de/vipra/util/Matrix.java
new file mode 100644
index 00000000..adfb4644
--- /dev/null
+++ b/vipra-util/src/main/java/de/vipra/util/Matrix.java
@@ -0,0 +1,67 @@
+package de.vipra.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class Matrix<T, U, V> {
+
+	private final Map<T, Map<U, V>> rowMap;
+	private final Map<U, Map<T, V>> colMap;
+
+	private int startRowSize = 10;
+	private int startColSize = 10;
+
+	public Matrix() {
+		rowMap = new HashMap<>();
+		colMap = new HashMap<>();
+	}
+
+	public Matrix(int rowSize, int colSize) {
+		rowMap = new HashMap<>(rowSize);
+		colMap = new HashMap<>(colSize);
+		startRowSize = rowSize;
+		startColSize = colSize;
+	}
+
+	public V put(T t, U u, V v) {
+		Map<U, V> row = rowMap.get(t);
+		Map<T, V> col = colMap.get(u);
+		V oldV = null;
+		if (row == null) {
+			row = new HashMap<>(startRowSize);
+			rowMap.put(t, row);
+		} else {
+			oldV = row.get(u);
+		}
+		if (col == null) {
+			col = new HashMap<>(startColSize);
+			colMap.put(u, col);
+		}
+		row.put(u, v);
+		col.put(t, v);
+		return oldV;
+	}
+
+	public V get(T t, U u) {
+		Map<U, V> subMap = rowMap.get(t);
+		if (subMap == null)
+			return null;
+		return subMap.get(u);
+	}
+
+	public int size() {
+		int size = 0;
+		for (Map<U, V> subMap : rowMap.values())
+			size += subMap.size();
+		return size;
+	}
+
+	public Map<U, V> row(T t) {
+		return rowMap.get(t);
+	}
+
+	public Map<T, V> col(U u) {
+		return colMap.get(u);
+	}
+
+}
diff --git a/vipra-util/src/main/java/de/vipra/util/model/Topic.java b/vipra-util/src/main/java/de/vipra/util/model/Topic.java
index c373e009..a52d6ca6 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/Topic.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/Topic.java
@@ -23,7 +23,7 @@ public class Topic implements Model<ObjectId>, Serializable {
 
 	private String name;
 
-	private int articlesCount;
+	private Integer articlesCount;
 
 	public Topic() {}
 
diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java
index 337e3b88..9bd32d02 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java
@@ -43,6 +43,10 @@ public class TopicFull implements Model<ObjectId>, Serializable {
 	@QueryIgnore(multi = true)
 	private List<TopicWord> words;
 
+	@Embedded
+	@QueryIgnore(multi = true)
+	private List<TopicShare> similarTopics;
+
 	@QueryIgnore(multi = true)
 	private Double avgRelevance;
 
@@ -58,7 +62,7 @@ public class TopicFull implements Model<ObjectId>, Serializable {
 	@QueryIgnore(multi = true)
 	private Double risingDecayRelevance;
 
-	private int articlesCount;
+	private Integer articlesCount;
 
 	private Date created;
 
@@ -110,6 +114,14 @@ public class TopicFull implements Model<ObjectId>, Serializable {
 		this.words = words;
 	}
 
+	public List<TopicShare> getSimilarTopics() {
+		return similarTopics;
+	}
+
+	public void setSimilarTopics(List<TopicShare> similarTopics) {
+		this.similarTopics = similarTopics;
+	}
+
 	public Double getAvgRelevance() {
 		return avgRelevance;
 	}
diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java b/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java
index aa6fe9f1..d369d5ef 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java
@@ -41,6 +41,7 @@ public class TopicModelConfig implements Serializable {
 	private double minRelativeProbability = Constants.MIN_RELATIVE_PROB;
 	private double risingDecayLambda = Constants.RISING_DECAY_LAMBDA;
 	private double maxSimilarDocumentsDivergence = Constants.MAX_SIMILAR_DOCUMENTS_DIVERGENCE;
+	private double maxSimilarTopicsDivergence = Constants.MAX_SIMILAR_TOPICS_DIVERGENCE;
 	private WindowResolution windowResolution = Constants.WINDOW_RESOLUTION;
 	private ProcessorMode processorMode = Constants.PROCESSOR_MODE;
 
@@ -62,6 +63,7 @@ public class TopicModelConfig implements Serializable {
 		minRelativeProbability = topicModelConfig.getMinRelativeProbability();
 		risingDecayLambda = topicModelConfig.getRisingDecayLambda();
 		maxSimilarDocumentsDivergence = topicModelConfig.getMaxSimilarDocumentsDivergence();
+		maxSimilarTopicsDivergence = topicModelConfig.getMaxSimilarTopicsDivergence();
 		windowResolution = topicModelConfig.getWindowResolution();
 		processorMode = topicModelConfig.getProcessorMode();
 	}
@@ -202,6 +204,14 @@ public class TopicModelConfig implements Serializable {
 		this.maxSimilarDocumentsDivergence = maxSimilarDocumentsDivergence;
 	}
 
+	public double getMaxSimilarTopicsDivergence() {
+		return maxSimilarTopicsDivergence;
+	}
+
+	public void setMaxSimilarTopicsDivergence(final double maxSimilarTopicsDivergence) {
+		this.maxSimilarTopicsDivergence = maxSimilarTopicsDivergence;
+	}
+
 	public WindowResolution getWindowResolution() {
 		return windowResolution;
 	}
@@ -248,7 +258,8 @@ public class TopicModelConfig implements Serializable {
 				+ "\n documentMinimumWordFrequency: " + documentMinimumWordFrequency + "\n spotlightSupport: " + spotlightSupport
 				+ "\n spotlightConfidence: " + spotlightConfidence + "\n minTopicShare: " + minTopicShare + "\n minRelativeProbability: "
 				+ minRelativeProbability + "\n risingDecayLambda: " + risingDecayLambda + "\n maxSimilarDocumentsDivergence: "
-				+ maxSimilarDocumentsDivergence + "\n windowResolution: " + windowResolution + "\n processorMode: " + processorMode;
+				+ maxSimilarDocumentsDivergence + "\n maxSimilarTopicsDivergence: " + maxSimilarTopicsDivergence + "\n windowResolution: "
+				+ windowResolution + "\n processorMode: " + processorMode;
 	}
 
 }
-- 
GitLab