From 98a4ca756b90b73cf27d9392cc99eee268b18f56 Mon Sep 17 00:00:00 2001 From: Eike Cochu <eike@cochu.com> Date: Sun, 24 Apr 2016 21:23:46 +0200 Subject: [PATCH] updated explorer, added count matrix updated explorer split screen updated sequence dropdown, optional cancel button added matrix and countmatrix for topic similarities --- vipra-cmd/runcfg/CMD.launch | 2 +- .../main/java/de/vipra/cmd/lda/Analyzer.java | 45 +++++++++++-- .../de/vipra/cmd/option/EditModelCommand.java | 2 + .../html/directives/sequence-dropdown.html | 3 +- vipra-ui/app/html/explorer.html | 9 ++- vipra-ui/app/js/controllers.js | 10 +++ vipra-ui/app/js/directives.js | 11 ++- vipra-ui/app/less/app.less | 7 +- .../main/java/de/vipra/util/Constants.java | 6 ++ .../main/java/de/vipra/util/CountMatrix.java | 14 ++++ .../src/main/java/de/vipra/util/Matrix.java | 67 +++++++++++++++++++ .../main/java/de/vipra/util/model/Topic.java | 2 +- .../java/de/vipra/util/model/TopicFull.java | 14 +++- .../de/vipra/util/model/TopicModelConfig.java | 13 +++- 14 files changed, 189 insertions(+), 16 deletions(-) create mode 100644 vipra-util/src/main/java/de/vipra/util/CountMatrix.java create mode 100644 vipra-util/src/main/java/de/vipra/util/Matrix.java diff --git a/vipra-cmd/runcfg/CMD.launch b/vipra-cmd/runcfg/CMD.launch index bb49f0c2..7b13eb9c 100644 --- a/vipra-cmd/runcfg/CMD.launch +++ b/vipra-cmd/runcfg/CMD.launch @@ -11,7 +11,7 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-CS test -C asd"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-S test2 -M"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java index eb184d95..793694a8 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java @@ -23,6 +23,7 @@ import de.vipra.util.ArrayUtils; import de.vipra.util.CompareMap; import de.vipra.util.Config; import de.vipra.util.ConsoleUtils; +import de.vipra.util.CountMatrix; import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; import de.vipra.util.ex.ConfigException; @@ -351,9 +352,11 @@ public class Analyzer { newTopic.setRisingDecayRelevance(risingDecayRelevance); } - // create topic references and store document similarities + // create topic references, get document and topic similarities + final CountMatrix<ObjectId, ObjectId> topicShareMatrix = new CountMatrix<>(); int idxArticle = -1; + for (final FilebaseIDDateIndexEntry entry : idDateIndex) { idxArticle++; @@ -375,6 +378,12 @@ public class Analyzer { } } + // count topic/topic share per article + + for (final TopicShare topicRef1 : newTopicRefs) + for (final TopicShare topicRef2 : newTopicRefs) + topicShareMatrix.count(topicRef1.getTopic().getId(), topicRef2.getTopic().getId()); + // calculate article divergences final List<SimilarArticle> similarArticles = new ArrayList<>(articleCount - 1); @@ -423,13 +432,33 @@ public class Analyzer { } // remove unreferenced topics - + for (ListIterator<TopicFull> iter = newTopics.listIterator(); iter.hasNext();) { TopicFull topic = iter.next(); if (topic.getArticlesCount() == 0) iter.remove(); } + // calculate topic similarities + + int topicMinCount = (int) Math.ceil(topicCount * (1 - modelConfig.getMaxSimilarTopicsDivergence())); + + for (TopicFull topic1 : newTopics) { + final List<TopicShare> similarTopics = new ArrayList<>(); + for (TopicFull topic2 : newTopics) { + if (!topic1.getId().equals(topic2.getId())) { + Integer count = topicShareMatrix.get(topic1.getId(), topic2.getId()); + if (count != null && count >= topicMinCount) { + final TopicShare newTopicShare = new TopicShare(); + newTopicShare.setTopic(new Topic(topic2.getId())); + newTopicShare.setShare((double) count / topicCount); + similarTopics.add(newTopicShare); + } + } + } + topic1.setSimilarTopics(similarTopics); + } + // recreate entities final QueryBuilder builder = QueryBuilder.builder().eq("topicModel", new TopicModel(modelConfig.getName())); @@ -446,10 +475,18 @@ public class Analyzer { private int printProgress(final int tenthPercent, final double progress, final int iteration, final int maxIterationsLength, final long remainingNanos, final TopicModelConfig modelConfig, final int lastLength) { - final String msg = " [" + StringUtils.repeat("#", tenthPercent) + StringUtils.repeat(" ", 10 - tenthPercent) + "] " + String msg = " [" + StringUtils.repeat("#", tenthPercent) + StringUtils.repeat(" ", 10 - tenthPercent) + "] " + StringUtils.pad(Integer.toString((int) Math.floor(progress)), 3, true) + "% (" + StringUtils.pad(Integer.toString(iteration), maxIterationsLength, true) + "/" + modelConfig.getDynamicMinIterations() + "-" - + modelConfig.getDynamicMaxIterations() + ") " + StringUtils.timeString(remainingNanos, false, true, false) + "\r"; + + modelConfig.getDynamicMaxIterations() + ") " + StringUtils.timeString(remainingNanos, false, true, false); + + // add padding if shorter than last message to clear rest of line + if (msg.length() < lastLength) + msg += StringUtils.repeat(" ", lastLength - msg.length()); + + // add carriage return to rewrite next line + msg += "\r"; + ConsoleUtils.infoNOLF(msg); return msg.length() - 1; } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java index 11c2c87e..43cfbf84 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java @@ -51,6 +51,8 @@ public class EditModelCommand implements Command { topicModelConfig.setRisingDecayLambda(ConsoleUtils.readDouble("rising decay lambda", topicModelConfig.getRisingDecayLambda())); topicModelConfig.setMaxSimilarDocumentsDivergence( ConsoleUtils.readDouble("max similar documents divergence", topicModelConfig.getMaxSimilarDocumentsDivergence(), 0.0, 1.0, true)); + topicModelConfig.setMaxSimilarTopicsDivergence( + ConsoleUtils.readDouble("max similar topics divergence", topicModelConfig.getMaxSimilarTopicsDivergence(), 0.0, 1.0, true)); topicModelConfig .setWindowResolution(ConsoleUtils.readEnum(WindowResolution.class, "window resolution", topicModelConfig.getWindowResolution())); topicModelConfig.setProcessorMode(ConsoleUtils.readEnum(ProcessorMode.class, "processor mode", topicModelConfig.getProcessorMode())); diff --git a/vipra-ui/app/html/directives/sequence-dropdown.html b/vipra-ui/app/html/directives/sequence-dropdown.html index 17e05326..09cddc2d 100644 --- a/vipra-ui/app/html/directives/sequence-dropdown.html +++ b/vipra-ui/app/html/directives/sequence-dropdown.html @@ -1,5 +1,6 @@ -<ol class="nya-bs-select nya-bs-condensed" ng-model="ngModel" ng-class="{dropup:dropup}"> +<ol class="nya-bs-select nya-bs-condensed" ng-model="ngModel" ng-class="{dropup:showDropup}"> <li value="{{sequence.id}}" class="nya-bs-option" ng-repeat="sequence in sequences"> <a ng-bind="sequence.label"></a> </li> </ol> +<button class="btn btn-sm btn-default" ng-click="doClear()" ng-show="showClear">Clear</button> \ No newline at end of file diff --git a/vipra-ui/app/html/explorer.html b/vipra-ui/app/html/explorer.html index 9c87e856..9f467b8f 100644 --- a/vipra-ui/app/html/explorer.html +++ b/vipra-ui/app/html/explorer.html @@ -25,7 +25,7 @@ <li ng-repeat="topic in topics | orderBy:explorerModels.sorttopics:explorerModels.sortdir | filter:search" ng-mouseenter="highlightSeries(topic.id, true)" ng-mouseleave="highlightSeries(topic.id, false)" ng-class="{selected:topic.selected}" class="text-muted"> <div class="checkbox checkbox-condensed"> <span class="valuebar" ng-style="{width:topic.topicCurrValue}"></span> - <input tabindex="0" type="checkbox" ng-model="topic.selected" ng-attr-id="relevance-{{::topic.id}}" ng-change="redrawGraph()"> + <input tabindex="0" type="checkbox" ng-model="topic.selected" ng-attr-id="relevance-{{::topic.id}}" ng-change="changeSelectedTopics()"> <label class="check" ng-attr-for="relevance-{{::topic.id}}"> <topic-menu topic="topic" class="menu-button" /> <span class="ellipsis" ng-attr-title="{{::topic.name}}"> @@ -91,7 +91,12 @@ </ul> </div> <div class="col-xs-9 col-md-10 center"> - + <div class="wrapper"> + <div class="topbar"> + <small>Sequence:</small> + <sequence-dropdown ng-model="explorerModels.sequenceId" sequences="explorerModels.activeTopic.sequences" clear="true"></sequence-dropdown> + </div> + </div> </div> </div> </div> \ No newline at end of file diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js index ce951ed1..396553cf 100644 --- a/vipra-ui/app/js/controllers.js +++ b/vipra-ui/app/js/controllers.js @@ -566,6 +566,16 @@ t.selected = toggle ? !t.selected : to; } $scope.redrawGraph(); + $scope.changeSelectedTopics(); + }; + + $scope.changeSelectedTopics = function() { + if($scope.explorerModels.activeTopic && !$scope.explorerModels.activeTopic.selected) { + delete $scope.explorerModels.activeTopic; + delete $scope.explorerModels.sequenceId; + } + + $scope.redrawGraph(); }; $scope.redrawGraph = function() { diff --git a/vipra-ui/app/js/directives.js b/vipra-ui/app/js/directives.js index 1aac8d1a..cc2fada9 100644 --- a/vipra-ui/app/js/directives.js +++ b/vipra-ui/app/js/directives.js @@ -257,10 +257,13 @@ scope: { ngModel: '=', sequences: '=', - dropup: '@' + dropup: '@', + clear: '@' }, link: function($scope) { - $scope.dropup = $scope.dropup === 'true'; + $scope.showDropup = $scope.dropup === 'true'; + $scope.showClear = $scope.clear === 'true'; + $scope.$watch('sequences', function(newValue) { if (newValue) { for (var i = 0, s; i < $scope.sequences.length; i++) { @@ -269,6 +272,10 @@ } } }); + + $scope.doClear = function() { + delete $scope.ngModel; + }; }, templateUrl: '/html/directives/sequence-dropdown.html' }; diff --git a/vipra-ui/app/less/app.less b/vipra-ui/app/less/app.less index 3df7c073..d1a4c835 100644 --- a/vipra-ui/app/less/app.less +++ b/vipra-ui/app/less/app.less @@ -163,9 +163,10 @@ a:hover { .nya-bs-condensed { width: auto !important; - margin-top: -2px; .dropdown-toggle { - padding: 0px 25px 0px 12px; + padding: 5px 25px 5px 10px; + font-size: 12px; + line-height: 1.5; } .dropdown-menu li a { padding: 2px 12px; @@ -504,7 +505,7 @@ entity-menu { } .seq-head-foot { - height: 42px; + height: 51px; } .table-compare { diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index a4084fd7..ed5c39e9 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -107,6 +107,12 @@ public class Constants { */ public static final double MAX_SIMILAR_DOCUMENTS_DIVERGENCE = 0.25; + /** + * Maximum divergence between a topic and similar topics. Lower values mean + * more similar topics (less divergence). Default 0.25. + */ + public static final double MAX_SIMILAR_TOPICS_DIVERGENCE = 0.25; + /** * Dynamic minimum iterations. Used for dynamic topic modeling. Default 100. */ diff --git a/vipra-util/src/main/java/de/vipra/util/CountMatrix.java b/vipra-util/src/main/java/de/vipra/util/CountMatrix.java new file mode 100644 index 00000000..7177ddc2 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/CountMatrix.java @@ -0,0 +1,14 @@ +package de.vipra.util; + +public class CountMatrix<T, U> extends Matrix<T, U, Integer> { + + public void count(T t, U u) { + Integer i = get(t, u); + if (i == null) + i = 1; + else + i++; + put(t, u, i); + } + +} diff --git a/vipra-util/src/main/java/de/vipra/util/Matrix.java b/vipra-util/src/main/java/de/vipra/util/Matrix.java new file mode 100644 index 00000000..adfb4644 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/Matrix.java @@ -0,0 +1,67 @@ +package de.vipra.util; + +import java.util.HashMap; +import java.util.Map; + +public class Matrix<T, U, V> { + + private final Map<T, Map<U, V>> rowMap; + private final Map<U, Map<T, V>> colMap; + + private int startRowSize = 10; + private int startColSize = 10; + + public Matrix() { + rowMap = new HashMap<>(); + colMap = new HashMap<>(); + } + + public Matrix(int rowSize, int colSize) { + rowMap = new HashMap<>(rowSize); + colMap = new HashMap<>(colSize); + startRowSize = rowSize; + startColSize = colSize; + } + + public V put(T t, U u, V v) { + Map<U, V> row = rowMap.get(t); + Map<T, V> col = colMap.get(u); + V oldV = null; + if (row == null) { + row = new HashMap<>(startRowSize); + rowMap.put(t, row); + } else { + oldV = row.get(u); + } + if (col == null) { + col = new HashMap<>(startColSize); + colMap.put(u, col); + } + row.put(u, v); + col.put(t, v); + return oldV; + } + + public V get(T t, U u) { + Map<U, V> subMap = rowMap.get(t); + if (subMap == null) + return null; + return subMap.get(u); + } + + public int size() { + int size = 0; + for (Map<U, V> subMap : rowMap.values()) + size += subMap.size(); + return size; + } + + public Map<U, V> row(T t) { + return rowMap.get(t); + } + + public Map<T, V> col(U u) { + return colMap.get(u); + } + +} diff --git a/vipra-util/src/main/java/de/vipra/util/model/Topic.java b/vipra-util/src/main/java/de/vipra/util/model/Topic.java index c373e009..a52d6ca6 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Topic.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Topic.java @@ -23,7 +23,7 @@ public class Topic implements Model<ObjectId>, Serializable { private String name; - private int articlesCount; + private Integer articlesCount; public Topic() {} diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java index 337e3b88..9bd32d02 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java @@ -43,6 +43,10 @@ public class TopicFull implements Model<ObjectId>, Serializable { @QueryIgnore(multi = true) private List<TopicWord> words; + @Embedded + @QueryIgnore(multi = true) + private List<TopicShare> similarTopics; + @QueryIgnore(multi = true) private Double avgRelevance; @@ -58,7 +62,7 @@ public class TopicFull implements Model<ObjectId>, Serializable { @QueryIgnore(multi = true) private Double risingDecayRelevance; - private int articlesCount; + private Integer articlesCount; private Date created; @@ -110,6 +114,14 @@ public class TopicFull implements Model<ObjectId>, Serializable { this.words = words; } + public List<TopicShare> getSimilarTopics() { + return similarTopics; + } + + public void setSimilarTopics(List<TopicShare> similarTopics) { + this.similarTopics = similarTopics; + } + public Double getAvgRelevance() { return avgRelevance; } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java b/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java index aa6fe9f1..d369d5ef 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java @@ -41,6 +41,7 @@ public class TopicModelConfig implements Serializable { private double minRelativeProbability = Constants.MIN_RELATIVE_PROB; private double risingDecayLambda = Constants.RISING_DECAY_LAMBDA; private double maxSimilarDocumentsDivergence = Constants.MAX_SIMILAR_DOCUMENTS_DIVERGENCE; + private double maxSimilarTopicsDivergence = Constants.MAX_SIMILAR_TOPICS_DIVERGENCE; private WindowResolution windowResolution = Constants.WINDOW_RESOLUTION; private ProcessorMode processorMode = Constants.PROCESSOR_MODE; @@ -62,6 +63,7 @@ public class TopicModelConfig implements Serializable { minRelativeProbability = topicModelConfig.getMinRelativeProbability(); risingDecayLambda = topicModelConfig.getRisingDecayLambda(); maxSimilarDocumentsDivergence = topicModelConfig.getMaxSimilarDocumentsDivergence(); + maxSimilarTopicsDivergence = topicModelConfig.getMaxSimilarTopicsDivergence(); windowResolution = topicModelConfig.getWindowResolution(); processorMode = topicModelConfig.getProcessorMode(); } @@ -202,6 +204,14 @@ public class TopicModelConfig implements Serializable { this.maxSimilarDocumentsDivergence = maxSimilarDocumentsDivergence; } + public double getMaxSimilarTopicsDivergence() { + return maxSimilarTopicsDivergence; + } + + public void setMaxSimilarTopicsDivergence(final double maxSimilarTopicsDivergence) { + this.maxSimilarTopicsDivergence = maxSimilarTopicsDivergence; + } + public WindowResolution getWindowResolution() { return windowResolution; } @@ -248,7 +258,8 @@ public class TopicModelConfig implements Serializable { + "\n documentMinimumWordFrequency: " + documentMinimumWordFrequency + "\n spotlightSupport: " + spotlightSupport + "\n spotlightConfidence: " + spotlightConfidence + "\n minTopicShare: " + minTopicShare + "\n minRelativeProbability: " + minRelativeProbability + "\n risingDecayLambda: " + risingDecayLambda + "\n maxSimilarDocumentsDivergence: " - + maxSimilarDocumentsDivergence + "\n windowResolution: " + windowResolution + "\n processorMode: " + processorMode; + + maxSimilarDocumentsDivergence + "\n maxSimilarTopicsDivergence: " + maxSimilarTopicsDivergence + "\n windowResolution: " + + windowResolution + "\n processorMode: " + processorMode; } } -- GitLab