diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java index 365fc9a44d8c1cb06b4a1aa8fffa3c683fb1a3e4..f3398bb1026057f75b27406cf21491cc5a94ac7c 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java @@ -133,10 +133,10 @@ public class Analyzer { final int topicCount = modelConfig.getkTopics(); assert topicCount > 0; - final int sequencesCount = windowIndex.size(); - assert sequencesCount > 0; + final int windowCount = windowIndex.size(); + assert windowCount > 0; - final int articlesCount = idDateIndex.size(); + final int articleCount = idDateIndex.size(); final int wordCount = wordIndex.size(); // read topic distributions @@ -146,8 +146,8 @@ public class Analyzer { throw new AnalyzerException("file not found: " + gamFile.getAbsolutePath()); in = new BufferedReader(new InputStreamReader(new FileInputStream(gamFile))); - final double[][] topicDistributions = new double[articlesCount][topicCount]; - for (int idxArticle = 0; idxArticle < articlesCount; idxArticle++) { + final double[][] topicDistributions = new double[articleCount][topicCount]; + for (int idxArticle = 0; idxArticle < articleCount; idxArticle++) { // read distributions into matrix and sum double topicDistributionSum = 0; for (int idxTopic = 0; idxTopic < topicCount; idxTopic++) { @@ -166,18 +166,23 @@ public class Analyzer { // read topic definition files and create topics final TopicModelFull topicModel = new TopicModelFull(modelConfig.getName(), modelConfig); - final List<WindowFull> newWindows = new ArrayList<>(sequencesCount); - final List<SequenceFull> newSequences = new ArrayList<>(topicCount * sequencesCount); + final List<WindowFull> newWindows = new ArrayList<>(windowCount); + final List<SequenceFull> newSequences = new ArrayList<>(topicCount * windowCount); final List<TopicFull> newTopics = new ArrayList<>(topicCount); + topicModel.setWordCount(wordCount); + topicModel.setWindowCount(windowCount); + topicModel.setArticleCount(articleCount); + topicModel.setTopicCount(topicCount); + ConsoleUtils.info("vocabulary size: " + wordCount); - ConsoleUtils.info("sequences: " + sequencesCount); + ConsoleUtils.info("sequences: " + windowCount); ConsoleUtils.info("topics: " + topicCount); final boolean seqRelativeCutoff = modelConfig.getMinRelativeProbability() > 0; // create sequence windows - for (int idxSeq = 0; idxSeq < sequencesCount; idxSeq++) { + for (int idxSeq = 0; idxSeq < windowCount; idxSeq++) { final WindowFull newWindow = new WindowFull(); newWindow.setId(idxSeq); newWindow.setStartDate(windowIndex.startDate(idxSeq)); @@ -197,7 +202,7 @@ public class Analyzer { // create new topic final TopicFull newTopic = new TopicFull(); - final List<Sequence> newTopicSequences = new ArrayList<>(sequencesCount); + final List<Sequence> newTopicSequences = new ArrayList<>(windowCount); newTopic.setSequences(newTopicSequences); newTopic.setTopicModel(new TopicModel(topicModel.getId())); newTopics.add(newTopic); @@ -206,9 +211,9 @@ public class Analyzer { // read file lines into word x sequence matrix // gather maximum likeliness per sequence and per word - final double[][] likelinesses = new double[wordCount][sequencesCount]; + final double[][] likelinesses = new double[wordCount][windowCount]; for (int idxWord = 0; idxWord < wordCount; idxWord++) { - for (int idxSeq = 0; idxSeq < sequencesCount; idxSeq++) { + for (int idxSeq = 0; idxSeq < windowCount; idxSeq++) { likelinesses[idxWord][idxSeq] = Double.parseDouble(in.readLine()); } } @@ -221,12 +226,12 @@ public class Analyzer { // collect top words in each sequence for topic name final Set<TopicWord> topTopicWords = new HashSet<>(); - final double[] relevances = new double[sequencesCount]; + final double[] relevances = new double[windowCount]; double relevanceSum = 0; double prevRelevance = 0; // for each sequence - for (int idxSeq = 0, sequenceOffset = 0; idxSeq < sequencesCount; idxSeq++) { + for (int idxSeq = 0, sequenceOffset = 0; idxSeq < windowCount; idxSeq++) { // calculate relative cutoff probability final double maxSeqLikeliness = maxSeqLikelinesses[idxSeq]; final double minRelativeSeqLikeliness = modelConfig.getMinRelativeProbability() * Math.abs(maxSeqLikeliness); @@ -291,14 +296,14 @@ public class Analyzer { newTopic.setName(TopicFull.getNameFromWords(modelConfig.getTopicAutoNamingWords(), topTopicWordsList)); // calculate average - final double average = relevanceSum / sequencesCount; + final double average = relevanceSum / windowCount; newTopic.setAvgRelevance(average); // calculate variance double variance = 0; for (final double relevance : relevances) variance += Math.pow(relevance - average, 2); - newTopic.setVarRelevance(variance / sequencesCount); + newTopic.setVarRelevance(variance / windowCount); // calculate rising/falling/rising-decay relevances double risingRelevance = 0; @@ -312,7 +317,7 @@ public class Analyzer { } else { fallingRelevance += Math.abs(relevanceDiff); } - risingDecayRelevance += Math.exp(-modelConfig.getRisingDecayLambda() * (sequencesCount - idxSeq2 + 1)) * relevanceDiff; + risingDecayRelevance += Math.exp(-modelConfig.getRisingDecayLambda() * (windowCount - idxSeq2 + 1)) * relevanceDiff; } newTopic.setRisingRelevance(risingRelevance); newTopic.setFallingRelevance(fallingRelevance); @@ -356,9 +361,9 @@ public class Analyzer { // calculate divergences - final List<SimilarArticle> similarArticles = new ArrayList<>(articlesCount - 1); + final List<SimilarArticle> similarArticles = new ArrayList<>(articleCount - 1); - for (int idxArticle2 = 0; idxArticle2 < articlesCount; idxArticle2++) { + for (int idxArticle2 = 0; idxArticle2 < articleCount; idxArticle2++) { if (idxArticle == idxArticle2) continue; diff --git a/vipra-ui/app/index.html b/vipra-ui/app/index.html index c39c42bec7338c08efb1ab66f3ffd92f441efb57..d2e3ede9b09901f6e0deda67c077b0d98f6dee6e 100644 --- a/vipra-ui/app/index.html +++ b/vipra-ui/app/index.html @@ -95,8 +95,11 @@ <div class="modal-body"> <ul class="list-group" ng-show="topicModels.length"> <button type="button" class="list-group-item" ng-repeat="topicModel in topicModels" ng-click="changeTopicModel(topicModel)" ng-class="{active:rootModels.topicModel.id===topicModel.id}"> - <span class="badge" ng-bind="topicModel.modelConfig.kTopics"></span> + <span class="badge" ng-bind="topicModel.articleCount" ng-show="topicModel.articleCount" ng-attr-title="{{topicModel.articleCount + ' article(s)'}}"></span> + <span class="badge" ng-bind="topicModel.topicCount" ng-show="topicModel.topicCount" ng-attr-title="{{topicModel.topicCount + ' topic(s)'}}"></span> <span ng-bind="topicModel.id"></span> + <br ng-show="topicModel.modelConfig.description"> + <small ng-bind="topicModel.modelConfig.description"></small> </button> </ul> <p class="text-center" ng-show="loading.any"> @@ -116,7 +119,7 @@ <pre>vipra -S some_model -I data.json</pre> </li> <li> - Generate topic data: + Generate topic data and index: <pre>vipra -S some_model -Mi</pre> </li> </ol> diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js index 1a4e7a39799c413e098c4a6ef47f88626b5ac3f3..f0975308cd97d5715392b414cf6675b1492b508c 100644 --- a/vipra-ui/app/js/controllers.js +++ b/vipra-ui/app/js/controllers.js @@ -32,7 +32,7 @@ } TopicModelFactory.query({ - fields: 'modelConfig' + fields: '_all' }, function(data) { $scope.topicModels = data; }, function(err) { diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicModelFull.java b/vipra-util/src/main/java/de/vipra/util/model/TopicModelFull.java index ad5572ab112f3f7aa426e76a86e8f8593419d936..ead4c87e29ac95dbbce1d1c23558a248390c54a7 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicModelFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicModelFull.java @@ -18,7 +18,13 @@ public class TopicModelFull implements Model<String>, Comparable<TopicModelFull> @Id private String id; - private String description; + private int topicCount; + + private int articleCount; + + private int wordCount; + + private int windowCount; @Embedded @QueryIgnore(multi = true) @@ -45,12 +51,36 @@ public class TopicModelFull implements Model<String>, Comparable<TopicModelFull> this.id = id; } - public String getDescription() { - return description; + public int getTopicCount() { + return topicCount; + } + + public void setTopicCount(int topicCount) { + this.topicCount = topicCount; + } + + public int getArticleCount() { + return articleCount; + } + + public void setArticleCount(int articleCount) { + this.articleCount = articleCount; + } + + public int getWordCount() { + return wordCount; + } + + public void setWordCount(int wordCount) { + this.wordCount = wordCount; + } + + public int getWindowCount() { + return windowCount; } - public void setDescription(final String description) { - this.description = description; + public void setWindowCount(int windowCount) { + this.windowCount = windowCount; } public TopicModelConfig getModelConfig() { diff --git a/vipra-util/src/main/java/de/vipra/util/service/MongoService.java b/vipra-util/src/main/java/de/vipra/util/service/MongoService.java index 99cafa807da5b397d6db5fb394ae752e4d5581da..0f881b0d018ad724fb105461bfbb03f69b6eac3f 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/MongoService.java +++ b/vipra-util/src/main/java/de/vipra/util/service/MongoService.java @@ -102,7 +102,7 @@ public class MongoService<Type extends Model<IdType>, IdType> implements Service } else { query.retrievedFields(false, fields); } - } else if (ignoredFieldsMultiQuery.length > 0) { + } else if (!builder.isAllFields() && ignoredFieldsMultiQuery.length > 0) { query.retrievedFields(false, ignoredFieldsMultiQuery); } } else if (ignoredFieldsMultiQuery.length > 0) { diff --git a/vipra-util/src/main/java/de/vipra/util/service/Service.java b/vipra-util/src/main/java/de/vipra/util/service/Service.java index 839e6989561dce0b3cf09c93b3f4e77941e8f72f..e791c8d0c76a35da12a6d87a5c230270b7f3971f 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/Service.java +++ b/vipra-util/src/main/java/de/vipra/util/service/Service.java @@ -175,6 +175,7 @@ public interface Service<Type extends Model<IdType>, IdType, E extends Exception private List<Tuple<String, Object>> criteria; private String[] fields; private boolean include; + private boolean allFields; private QueryBuilder() {} @@ -265,6 +266,13 @@ public interface Service<Type extends Model<IdType>, IdType, E extends Exception */ public QueryBuilder fields(final boolean include, final String... strings) { this.include = include; + for (String field : strings) { + if (field.equalsIgnoreCase("_all")) { + this.allFields = true; + this.fields = null; + return this; + } + } this.fields = strings; return this; } @@ -295,6 +303,10 @@ public interface Service<Type extends Model<IdType>, IdType, E extends Exception return fields; } + public boolean isAllFields() { + return allFields; + } + } }