diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java index 8f24a846fad295283532986a6c767486745ba7e8..57775a174799df32a197e30ae759310aa9e20209 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java @@ -344,18 +344,6 @@ public class Analyzer { newTopic.setRisingDecayRelevance(risingDecayRelevance); } - // recreate windows, sequences and topics - - final QueryBuilder builder = QueryBuilder.builder().criteria("topicModel", new TopicModel(modelConfig.getName())); - - dbWindows.deleteMultiple(builder); - dbSequences.deleteMultiple(builder); - dbTopics.deleteMultiple(builder); - - dbWindows.createMultiple(newWindows); - dbSequences.createMultiple(newSequences); - dbTopics.createMultiple(newTopics); - // create topic references and store document similarities int idxArticle = -1; @@ -369,10 +357,11 @@ public class Analyzer { double reducedShare = 0; final List<TopicShare> newTopicRefs = new ArrayList<>(topicCount); for (int idxTopic = 0; idxTopic < topicCount; idxTopic++) { - if (topicDistribution[idxTopic] > 0.01) { + if (topicDistribution[idxTopic] >= modelConfig.getMinTopicShare()) { reducedShare += topicDistribution[idxTopic]; final TopicShare newTopicRef = new TopicShare(); final TopicFull topicFull = newTopics.get(idxTopic); + topicFull.setArticlesCount(topicFull.getArticlesCount() + 1); newTopicRef.setTopic(new Topic(topicFull.getId())); newTopicRef.setShare(topicDistribution[idxTopic]); newTopicRefs.add(newTopicRef); @@ -419,13 +408,25 @@ public class Analyzer { article.setSimilarArticles(similarArticles); try { - dbArticles.updateSingle(article, "topicModel", "topics", "similarArticles"); + dbArticles.updateSingle(article, "topicModel", "topics", "topicsCount", "similarArticles"); } catch (final DatabaseException e) { ConsoleUtils.error(e); } } } + // recreate entities + + final QueryBuilder builder = QueryBuilder.builder().criteria("topicModel", new TopicModel(modelConfig.getName())); + + dbWindows.deleteMultiple(builder); + dbSequences.deleteMultiple(builder); + dbTopics.deleteMultiple(builder); + + dbWindows.createMultiple(newWindows); + dbSequences.createMultiple(newSequences); + dbTopics.createMultiple(newTopics); + topicModel.setLastGenerated(new Date()); dbTopicModels.replaceSingle(topicModel); } diff --git a/vipra-ui/app/html/articles/index.html b/vipra-ui/app/html/articles/index.html index fd3ac02b48c9e494a6886f48b5cc1414cda5628e..93d1d6648d9e9557e6d4bdd2929b3b361b5dcfe9 100644 --- a/vipra-ui/app/html/articles/index.html +++ b/vipra-ui/app/html/articles/index.html @@ -16,6 +16,7 @@ <li value="title" class="nya-bs-option"><a>Title</a></li> <li value="date" class="nya-bs-option"><a>Date</a></li> <li value="created" class="nya-bs-option"><a>Added</a></li> + <li value="topicsCount" class="nya-bs-option"><a># of topics</a></li> </ol> <sort-dir ng-model="articlesIndexModels.sortdir" /> </span> @@ -25,6 +26,7 @@ <tr ng-repeat="article in articles"> <td> <a ui-sref="articles.show({id: article.id})" ng-bind="::article.title"></a> + <span class="badge pull-right" ng-bind="::article.topicsCount" ng-attr-title="{{::article.topicsCount}} topic(s)"></span> </td> </tr> </tbody> diff --git a/vipra-ui/app/html/articles/show.html b/vipra-ui/app/html/articles/show.html index 2367cbe1a275b03bd36928438acfb8ed4ee028ff..fd64d83c9a44295f2ea93bcb29e0372a25b2d9c2 100644 --- a/vipra-ui/app/html/articles/show.html +++ b/vipra-ui/app/html/articles/show.html @@ -42,21 +42,21 @@ </tr> <tr> <th>Word count</th> - <td ng-bind="::article.stats.wordCount"></td> + <td ng-bind-template="{{::article.stats.wordCount}} ({{::article.stats.processedWordCount}} ↓ {{::Vipra.toPercent(article.stats.reductionRatio)}}%)" ng-attr-title="{{::article.stats.wordCount}} words in this article, {{::article.stats.processedWordCount}} after cleaning ({{::Vipra.toPercent(article.stats.reductionRatio)}}% reduction)"></td> </tr> </tbody> </table> <h3>Topics</h3> - <table class="table table-bordered table-condensed"> + <table class="table table-bordered table-condensed" ng-show="article.topics.length"> <thead> <tr> <th class="infocol" ng-model="articlesShowModels.topicsSort" sort-by="share">Share</th> - <th ng-model="articlesShowModels.topicsSort" sort-by="name">Name</th> + <th ng-model="articlesShowModels.topicsSort" sort-by="topic.name">Name</th> <th style="width:1px"></th> </tr> </thead> <tbody> - <tr ng-repeat="topic in article.topics | orderBy:articlesShowModels.topicsSort"> + <tr ng-repeat="topic in article.topics | orderBy:articlesShowModels.topicsSort" ng-mouseenter="highlightSlice(topic.topic.id, true)" ng-mouseleave="highlightSlice(topic.topic.id, false)"> <td class="text-right" ng-bind-template="{{(topic.share*100).toFixed(0)}}%"></td> <td> <topic-link topic="topic.topic" /> @@ -67,7 +67,7 @@ </tr> </tbody> </table> - <span class="text-muted" ng-hide="article.topics.length > 0">No topics</span> + <p class="text-muted" ng-hide="article.topics.length">No topics</p> </div> <div class="col-md-4"> <h3>Share</h3> @@ -75,22 +75,23 @@ </div> </div> <h3>Similar articles</h3> - <table class="table table-bordered table-condensed"> + <table class="table table-bordered table-condensed" ng-show="article.similarArticles.length"> <thead> <tr> - <th class="infocol" ng-model="articlesShowModels.similarSort" sort-by="divergence">Share</th> + <th class="infocol" ng-model="articlesShowModels.similarSort" sort-by="share">Share</th> <th ng-model="articlesShowModels.similarSort" sort-by="article.title">Title</th> </tr> </thead> <tbody> <tr ng-repeat="simArticle in article.similarArticles | orderBy:articlesShowModels.similarSort"> - <td class="text-right" ng-bind-template="{{((1-simArticle.divergence)*100).toFixed(0)}}%"></td> + <td class="text-right" ng-bind-template="{{::simArticle.share}}%"></td> <td> <a ui-sref="articles.show({id: simArticle.article.id})" ng-attr-title="{{::simArticle.article.title}}" ng-bind="::simArticle.article.title"></a> </td> </tr> </tbody> </table> + <p class="text-muted" ng-hide="article.similarArticles.length">No similar articles.</p> <hr> <div class="text-justify" ng-bind-html="::article.text"></div> </div> @@ -100,25 +101,33 @@ <div class="col-md-12"> <div class="panel panel-default"> <div class="panel-heading"> - Found - <ng-pluralize count="words.length||0" when="{0:'no words',1:'1 word',other:'{} words'}"></ng-pluralize> in the database. + Found <ng-pluralize count="allWords.length||0" when="{0:'no words',1:'1 word',other:'{} unique words'}"></ng-pluralize> for this article.<br> + Article has <ng-pluralize count="article.stats.wordCount||0" when="{0:'no words',1:'1 word',other:'{} words'}"></ng-pluralize>, <span ng-bind-template="{{::article.stats.processedWordCount}} after cleaning ({{::Vipra.toPercent(article.stats.reductionRatio)}}% reduction)"></span>. </div> - <table class="table table-bordered table-condensed"> + <table class="table table-bordered table-condensed table-fixed"> <thead> <tr> <th ng-model="articlesShowModels.wordsSort" sort-by="word">Word</th> <th ng-model="articlesShowModels.wordsSort" sort-by="count">Count</th> + <th>Share</th> + <th>Reduced share</th> </tr> </thead> <tbody> <tr ng-repeat="word in words | orderBy:articlesShowModels.wordsSort"> <td> - <word-link word="word" /> + <word-link word="::word" /> </td> - <td ng-bind="word.count"></td> + <td ng-bind="::word.count"></td> + <td ng-bind-template="{{::Vipra.toPercent(word.count/article.stats.wordCount, 2)}}%"></td> + <td ng-bind-template="{{::Vipra.toPercent(word.count/article.stats.processedWordCount, 2)}}%"></td> </tr> </tbody> </table> + <div class="panel-footer"> + <ng-pluralize count="words.length" when="{0:'No words',1:'First word',other:'First {} words'}"></ng-pluralize>. + <button class="btn btn-default btn-sm" ng-click="showMoreWords()" ng-show="words.length<allWords.length">Show more</button> + </div> </div> </div> </div> diff --git a/vipra-ui/app/html/partials/topic-popover.html b/vipra-ui/app/html/partials/topic-popover.html index e1b09ea5fbb01e5b7d44a95618bf3c740e8fed03..9fb94f3b0cfa269f0a0da4e89d8fdf02dbb3ddc2 100644 --- a/vipra-ui/app/html/partials/topic-popover.html +++ b/vipra-ui/app/html/partials/topic-popover.html @@ -1,4 +1,4 @@ -<table class="table table-bordered table-condensed table-nomargin"> +<table class="table table-bordered table-condensed nomargin"> <tbody> <tr> <th class="text-center">μ</th> diff --git a/vipra-ui/app/html/topics/index.html b/vipra-ui/app/html/topics/index.html index cf277d3968cbc33abe7814a5dd6ca579edafa91b..68d726c2dc449bf5f2b10a4192951983a05bbd20 100644 --- a/vipra-ui/app/html/topics/index.html +++ b/vipra-ui/app/html/topics/index.html @@ -24,6 +24,7 @@ <tr ng-repeat="topic in topics"> <td> <topic-link topic="topic" /> + <span class="badge pull-right" ng-bind="::topic.articlesCount" ng-attr-title="{{::topic.articlesCount}} article(s)"></span> </td> </tr> </tbody> diff --git a/vipra-ui/app/html/topics/show.html b/vipra-ui/app/html/topics/show.html index 63bcc290af0bb689d3858f853c6389cb605a0b09..387f7bda18f177dba20300c994944cfe6e64aeae 100644 --- a/vipra-ui/app/html/topics/show.html +++ b/vipra-ui/app/html/topics/show.html @@ -113,7 +113,7 @@ <small>Sequence:</small> <sequence-dropdown ng-model="sequenceId" sequences="topic.sequences"></sequence-dropdown> </div> - <table class="table table-condensed table-bordered table-hover" ng-show="sequence"> + <table class="table table-condensed table-bordered table-hover table-fixed" ng-show="sequence"> <thead> <tr> <th ng-model="topicsShowModels.seqSortWords" sort-by="word">Word</th> diff --git a/vipra-ui/app/index.html b/vipra-ui/app/index.html index c23f7998568bb048da2c4500d0e255eed6bb0604..fb7e6846ca7daaa0f0dacfd6d6abe7e6d21956a8 100644 --- a/vipra-ui/app/index.html +++ b/vipra-ui/app/index.html @@ -98,7 +98,7 @@ <h4 class="modal-title">Topic Models</h4> </div> <div class="modal-body"> - <ul class="list-group" ng-show="topicModels.length"> + <ul class="list-group nomargin" ng-show="topicModels.length"> <button type="button" class="list-group-item topic-model" ng-repeat="topicModel in topicModels" ng-click="changeTopicModel(topicModel)" ng-class="{'active selected-model':rootModels.topicModel.id===topicModel.id}"> <span class="badge" ng-bind="topicModel.articleCount" ng-show="topicModel.articleCount" ng-attr-title="{{topicModel.articleCount + ' article(s)'}}"></span> <span class="badge" ng-bind="topicModel.topicCount" ng-show="topicModel.topicCount" ng-attr-title="{{topicModel.topicCount + ' topic(s)'}}"></span> @@ -114,6 +114,8 @@ <p ng-hide="topicModels.length || loading.any"> No topic models in the database. Create a topic model and import data into it to begin. </p> + </div> + <div class="modal-body"> <h4>Quick start</h4> <ol> <li> diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js index bdcb50498c99363194c421b2293fa96c65d8e53b..d466d9afb7c06207c25c94eda0d2aaac62b87d27 100644 --- a/vipra-ui/app/js/controllers.js +++ b/vipra-ui/app/js/controllers.js @@ -71,7 +71,7 @@ combo: 'e', description: 'Go to explorer', callback: function() { - if ($state.current.name !== 'explorer') + if ($scope.rootModels.topicModel && $state.current.name !== 'explorer') $state.transitionTo('explorer'); } }); @@ -80,7 +80,7 @@ combo: 'a', description: 'Go to articles', callback: function() { - if ($state.current.name !== 'articles') + if ($scope.rootModels.topicModel && $state.current.name !== 'articles') $state.transitionTo('articles'); } }); @@ -89,7 +89,7 @@ combo: 't', description: 'Go to topics', callback: function() { - if ($state.current.name !== 'topics') + if ($scope.rootModels.topicModel && $state.current.name !== 'topics') $state.transitionTo('topics'); } }); @@ -597,8 +597,8 @@ function($scope, $state, $stateParams, $timeout, ArticleFactory) { $scope.articlesShowModels = { - topicsSort: 'share', - similarSort: 'divergence', + topicsSort: '-share', + similarSort: '-share', wordsSort: '-count' }; @@ -611,6 +611,12 @@ $scope.articleCreated = Vipra.formatDateTime($scope.article.created); $scope.articleModified = Vipra.formatDateTime($scope.article.modified); + // calculate share from divergence + if($scope.article.similarArticles) { + for(var i = 0; i < $scope.article.similarArticles.length; i++) + $scope.article.similarArticles[i].share = ((1 - $scope.article.similarArticles[i].divergence) * 100).toFixed(0); + } + // take topic model from article if (!angular.isObject($scope.rootModels.topicModel)) $scope.rootModels.topicModel = data.topicModel; @@ -626,7 +632,8 @@ d = { name: topics[i].topic.name, y: topics[i].share, - color: colors[i] + color: colors[i], + id: topics[i].topic.id }; topicShareSeries.push(d); @@ -656,9 +663,31 @@ id: $stateParams.id, fields: 'words' }, function(data) { - $scope.words = data.words; + $scope.allWords = data.words; + $scope.showMoreWords(); }); }; + + var wordsCount = 0; + $scope.showMoreWords = function() { + wordsCount += 20; + $scope.words = $scope.allWords.slice(0, wordsCount); + }; + + var topicShareChartElement = $('#topic-share'); + $scope.highlightSlice = function(id, toggle) { + var highcharts = topicShareChartElement.highcharts(); + if (!highcharts) return; + var point = highcharts.get(id); + if (!point) return; + + if (toggle) { + point.onMouseOver(); + } else { + point.onMouseOut(); + highcharts.tooltip.hide(); + } + }; } ]); diff --git a/vipra-ui/app/js/helpers.js b/vipra-ui/app/js/helpers.js index 88d0198a5be8b1770cd9c1033b994a12d6574306..8d58156a70c4dfe3afd49b2ab07f683a140fc9bf 100644 --- a/vipra-ui/app/js/helpers.js +++ b/vipra-ui/app/js/helpers.js @@ -22,10 +22,12 @@ return date.toLocaleDateString() + " " + date.toLocaleTimeString(); }; - Vipra.toPercent = function(input) { + Vipra.toPercent = function(input, nums) { + if (typeof input === 'undefined') + return; if (typeof input !== 'number') input = parseInt(input, 10); - return Math.round(input * 100); + return (input * 100).toFixed(isNaN(nums) ? 0 : nums); }; Vipra.createInitial = function(text) { diff --git a/vipra-ui/app/less/app.less b/vipra-ui/app/less/app.less index 0b4abd01f536f78250167c8d8a4c9b1d3b701f5f..7201c211439f0ac3c527a3f7dbdfe49ac9e59a67 100644 --- a/vipra-ui/app/less/app.less +++ b/vipra-ui/app/less/app.less @@ -19,6 +19,10 @@ a:hover { cursor: pointer; } +[sort-by]:hover { + background: #f5f5f5; +} + .heading { height: 125px; margin: 25px 0; @@ -144,7 +148,7 @@ a:hover { table-layout: fixed; } -.table-nomargin { +.nomargin { margin: 0; } @@ -420,6 +424,10 @@ word-menu { } } +.modal-body + .modal-body { + border-top: 1px solid #e5e5e5; +} + @-moz-keyframes spin { 100% { -moz-transform: rotateY(360deg); diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index e993274b308631b7bd3934ac85030c52d3e4323d..046f9cb9e45e9f0d3d2356dc891db5f543615f7f 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -84,6 +84,12 @@ public class Constants { */ public static final double RISING_DECAY_LAMBDA = 0.0; + /** + * Minimum topic share for an article. Topics with a smaller share are + * ignored. + */ + public static final double MIN_TOPIC_SHARE = 0.01; + /** * Minimum probability of words. Words with lower probability are ignored. * Default 0.01. diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java index 692ca2ed5d8abefce8176423a32297e9f80ca4e9..96c0407fb9722640778574c0fb2ab6e39a9c2e1d 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java @@ -62,6 +62,8 @@ public class ArticleFull implements Model<ObjectId>, Serializable { @QueryIgnore(multi = true) private List<TopicShare> topics; + private int topicsCount; + @Embedded @QueryIgnore(multi = true) private List<SimilarArticle> similarArticles; @@ -173,6 +175,11 @@ public class ArticleFull implements Model<ObjectId>, Serializable { public void setTopics(final List<TopicShare> topics) { this.topics = topics; + this.topicsCount = topics == null ? 0 : topics.size(); + } + + public int getTopicsCount() { + return topicsCount; } @ElasticIndex("topics") diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java index 6b6015ecec2b904d10e809b11fce9dd03e96f1ab..c687c3b228bf545559c00c45006f3ca11ea6bd10 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java @@ -58,6 +58,8 @@ public class TopicFull implements Model<ObjectId>, Serializable { @QueryIgnore(multi = true) private Double risingDecayRelevance; + private int articlesCount; + private Date created; private Date modified; @@ -148,6 +150,14 @@ public class TopicFull implements Model<ObjectId>, Serializable { this.risingDecayRelevance = risingDecayRelevance; } + public int getArticlesCount() { + return articlesCount; + } + + public void setArticlesCount(int articlesCount) { + this.articlesCount = articlesCount; + } + public Date getCreated() { return created; } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java b/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java index 88205fcedb56ae8899c89421620c339ded8f80be..58dc063a49b2f7ed0f7f584b91228c2bfbae8714 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java @@ -29,6 +29,7 @@ public class TopicModelConfig implements Serializable { private int documentMinimumWordFrequency = Constants.DOCUMENT_MIN_WORD_FREQ; private int spotlightSupport = Constants.SPOTLIGHT_SUPPORT; private double spotlightConfidence = Constants.SPOTLIGHT_CONFIDENCE; + private double minTopicShare = Constants.MIN_TOPIC_SHARE; private double minRelativeProbability = Constants.MIN_RELATIVE_PROB; private double risingDecayLambda = Constants.RISING_DECAY_LAMBDA; private double maxSimilarDocumentsDivergence = Constants.MAX_SIMILAR_DOCUMENTS_DIVERGENCE; @@ -49,6 +50,7 @@ public class TopicModelConfig implements Serializable { documentMinimumWordFrequency = topicModelConfig.getDocumentMinimumWordFrequency(); spotlightSupport = topicModelConfig.getSpotlightSupport(); spotlightConfidence = topicModelConfig.getSpotlightConfidence(); + minTopicShare = topicModelConfig.getMinTopicShare(); minRelativeProbability = topicModelConfig.getMinRelativeProbability(); risingDecayLambda = topicModelConfig.getRisingDecayLambda(); maxSimilarDocumentsDivergence = topicModelConfig.getMaxSimilarDocumentsDivergence(); @@ -160,6 +162,14 @@ public class TopicModelConfig implements Serializable { this.spotlightConfidence = spotlightConfidence; } + public double getMinTopicShare() { + return minTopicShare; + } + + public void setMinTopicShare(double minTopicShare) { + this.minTopicShare = minTopicShare; + } + public double getMinRelativeProbability() { return minRelativeProbability; }