diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java index c999d85f56f6c87d5ad3e2c6a884f985d5dae71c..a9f0de4f831436c722de14e0dec8133acba2e15f 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java @@ -79,8 +79,7 @@ public class InfoResource { info.put("const.topicautoname", Constants.TOPIC_AUTO_NAMING_WORDS); info.put("const.ktopics", Constants.K_TOPICS); info.put("const.ktopicwords", Constants.K_TOPIC_WORDS); - info.put("const.minimumlike", Constants.MINIMUM_RELATIVE_PROB); - info.put("const.topicthresh", Constants.TOPIC_THRESHOLD); + info.put("const.minrelprob", Constants.MINIMUM_RELATIVE_PROB); info.put("const.dynminiter", Constants.DYNAMIC_MIN_ITER); info.put("const.dynmaxiter", Constants.DYNAMIC_MAX_ITER); info.put("const.statiter", Constants.STATIC_ITER); diff --git a/vipra-cmd/runcfg/CMD - Reread.launch b/vipra-cmd/runcfg/CMD - Reread.launch new file mode 100644 index 0000000000000000000000000000000000000000..3d3851ee426eb65a2e42586756ff487f1fab42d1 --- /dev/null +++ b/vipra-cmd/runcfg/CMD - Reread.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-r"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +</launchConfiguration> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java b/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java index a7c34652663974ccf87fb0a66d3dea7b46bb0758..7498b4a2ee97e8d508c9ad3cccc171f1fef8ff11 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java @@ -44,6 +44,9 @@ public class CmdOptions extends Options { public static final String OPT_INDEXING = "e"; public static final String OPT_INDEXING_LONG = "indexing"; + public static final String OPT_REREAD = "r"; + public static final String OPT_REREAD_LONG = "reread"; + public CmdOptions() { addOption(Option.builder(OPT_HELP).longOpt(OPT_HELP_LONG).desc("print this message").build()); addOption(Option.builder(OPT_SHELL).longOpt(OPT_SHELL_LONG).hasArg(true).argName("name") @@ -60,6 +63,8 @@ public class CmdOptions extends Options { addOption(Option.builder(OPT_CONFIG).longOpt(OPT_CONFIG_LONG).desc("show configuration").build()); addOption(Option.builder(OPT_MODELING).longOpt(OPT_MODELING_LONG).desc("regenerate topic model").build()); addOption(Option.builder(OPT_INDEXING).longOpt(OPT_INDEXING_LONG).desc("regenerate search index").build()); + addOption(Option.builder(OPT_REREAD).longOpt(OPT_REREAD_LONG) + .desc("reread model files, ignored when remodeling").build()); } public void printHelp(final String cmd) { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java index c441664c9a651a61f77568d19578f772560484bc..e50d9f9bbe16219b9eb722bced3578e348c0c799 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java @@ -8,6 +8,7 @@ import static de.vipra.cmd.CmdOptions.OPT_HELP; import static de.vipra.cmd.CmdOptions.OPT_IMPORT; import static de.vipra.cmd.CmdOptions.OPT_INDEXING; import static de.vipra.cmd.CmdOptions.OPT_MODELING; +import static de.vipra.cmd.CmdOptions.OPT_REREAD; import static de.vipra.cmd.CmdOptions.OPT_SHELL; import static de.vipra.cmd.CmdOptions.OPT_SILENT; import static de.vipra.cmd.CmdOptions.OPT_STATS; @@ -111,8 +112,8 @@ public class Main { if (cline.hasOption(OPT_IMPORT)) commands.add(new ImportCommand(cline.getOptionValues(OPT_IMPORT))); - if (cline.hasOption(OPT_MODELING)) - commands.add(new ModelingCommand()); + if (cline.hasOption(OPT_MODELING) || cline.hasOption(OPT_REREAD)) + commands.add(new ModelingCommand(!cline.hasOption(OPT_MODELING) && cline.hasOption(OPT_REREAD))); if (cline.hasOption(OPT_INDEXING)) commands.add(new IndexingCommand()); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java index 8f9ce5bd5ca6a8e7838c40d9bb3d8de08681d9a0..88a31bcfe7827cb6be81cf04491abe79bda6c2d8 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java @@ -18,7 +18,7 @@ public abstract class Analyzer { public abstract void init(Config config) throws AnalyzerException; - public abstract void analyze() throws AnalyzerException; + public abstract void analyze(boolean reread) throws AnalyzerException; public static Analyzer getAnalyzer(final Config config) throws AnalyzerException { Analyzer analyzer = null; diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java index 925b61fa736eb87e5f6aacaff163eb86a57b37e7..88eb488db71c1d6f94a4703c70b1da577bdcd8c3 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java @@ -116,25 +116,29 @@ public class DTMAnalyzer extends Analyzer { } @Override - public void analyze() throws AnalyzerException { + public void analyze(final boolean reread) throws AnalyzerException { try { - final Process p = Runtime.getRuntime().exec(command, null); - if (!p.isAlive()) - throw new AnalyzerException("dtm process is dead"); - - // read from process output - BufferedReader in = new BufferedReader(new InputStreamReader(p.getErrorStream())); - - String line; - int iteration = 0; - while ((line = in.readLine()) != null) { - if (line.contains("EM iter")) { - log.info("iteration " + iteration++); + BufferedReader in; + + if (!reread) { + final Process p = Runtime.getRuntime().exec(command, null); + if (!p.isAlive()) + throw new AnalyzerException("dtm process is dead"); + + // read from process output + in = new BufferedReader(new InputStreamReader(p.getErrorStream())); + + String line; + int iteration = 0; + while ((line = in.readLine()) != null) { + if (line.contains("EM iter")) { + log.info("iteration " + iteration++); + } } - } - in.close(); - p.waitFor(); + in.close(); + p.waitFor(); + } final int wordCount = vocab.size(); final int sequencesCount = seqindex.sequenceCount(); @@ -175,7 +179,6 @@ public class DTMAnalyzer extends Analyzer { log.info("topics: " + Constants.K_TOPICS); final boolean seqRelativeCutoff = Constants.MINIMUM_RELATIVE_PROB > 0; - final boolean seqPercentCutoff = Constants.PERCENT_PROB < 1; // for each topic for (int idxTopic = 0; idxTopic < Constants.K_TOPICS; idxTopic++) { @@ -186,6 +189,7 @@ public class DTMAnalyzer extends Analyzer { final TopicFull newTopic = new TopicFull(); final List<Sequence> newTopicSequences = new ArrayList<>(sequencesCount); newTopic.setSequences(newTopicSequences); + newTopic.setDynamic(true); newTopics.add(newTopic); in = new BufferedReader(new InputStreamReader(new FileInputStream(seqFile))); @@ -207,6 +211,9 @@ public class DTMAnalyzer extends Analyzer { // collect top words in each sequence for topic name final Set<TopicWord> topTopicWords = new HashSet<>(); + final double[] relevances = new double[sequencesCount]; + double relevanceSum = 0; + // for each sequence for (int idxSeq = 0, sequenceOffset = 0; idxSeq < sequencesCount; idxSeq++) { // calculate relative cutoff probability @@ -231,12 +238,6 @@ public class DTMAnalyzer extends Analyzer { if (!newSeqTopicWords.isEmpty()) { Collections.sort(newSeqTopicWords, Comparator.reverseOrder()); - // top n percent cutoff - if (seqPercentCutoff) { - final int fromIndex = (int) Math.round(newSeqTopicWords.size() * Constants.PERCENT_PROB); - newSeqTopicWords.subList(fromIndex, newSeqTopicWords.size()).clear(); - } - // collect top words topTopicWords.addAll(newSeqTopicWords.subList(0, Math.min(newSeqTopicWords.size(), Constants.TOPIC_AUTO_NAMING_WORDS))); @@ -261,16 +262,25 @@ public class DTMAnalyzer extends Analyzer { newTopicSequences.add(new Sequence(newSequenceFull.getId())); sequenceOffset += sequenceSize; + relevanceSum += relevance; + relevances[idxSeq] = relevance; } // sort topic words and generate topic name final List<TopicWord> topTopicWordsList = new ArrayList<>(topTopicWords); Collections.sort(topTopicWordsList); newTopic.setName(TopicFull.getNameFromWords(topTopicWordsList)); - } - log.info("creating " + newTopics.size() + " " + StringUtils.quantity(newTopics.size(), "topic")); - log.info("creating " + newWords.size() + " " + StringUtils.quantity(newWords.size(), "word")); + // calculate average + final double average = relevanceSum / sequencesCount; + newTopic.setAvgRelevance(average); + + // calculate variance + double variance = 0; + for (final double relevance : relevances) + variance += Math.pow(relevance - average, 2); + newTopic.setVarRelevance(variance / sequencesCount); + } // recreate topics and words dbSequences.drop(); @@ -318,7 +328,7 @@ public class DTMAnalyzer extends Analyzer { } } - } catch (IOException | InterruptedException e) { + } catch (final Exception e) { throw new AnalyzerException(e); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java index a9814c61ae0ac95fe3e1a6a71b69de0c5e7d5a5a..7f94afc67fafb02e4a145d00811826816fe196cd 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java @@ -90,16 +90,18 @@ public class JGibbAnalyzer extends Analyzer { } @Override - public void analyze() throws AnalyzerException { + public void analyze(final boolean reread) throws AnalyzerException { if (!modelFile.exists()) { throw new AnalyzerException("model file does not exist: " + modelFile.getAbsolutePath()); } - final Estimator estimator = new Estimator(); - estimator.init(options); - estimator.estimate(); + + if (!reread) { + final Estimator estimator = new Estimator(); + estimator.init(options); + estimator.estimate(); + } final boolean seqRelativeCutoff = Constants.MINIMUM_RELATIVE_PROB > 0; - final boolean seqPercentCutoff = Constants.PERCENT_PROB < 1; // read topic definitions and save @@ -159,12 +161,6 @@ public class JGibbAnalyzer extends Analyzer { if (!filteredTopicWords.isEmpty()) { Collections.sort(filteredTopicWords, Collections.reverseOrder()); - // top n percent cutoff - if (seqPercentCutoff) { - final int fromIndex = (int) Math.round(filteredTopicWords.size() * Constants.PERCENT_PROB); - filteredTopicWords.subList(fromIndex, filteredTopicWords.size()).clear(); - } - topic.setName(TopicFull.getNameFromWords(filteredTopicWords)); } } @@ -196,11 +192,14 @@ public class JGibbAnalyzer extends Analyzer { // extract topic ids and count them final CountMap<String> countMap = new CountMap<>(); final Matcher matcher = topicIndexPattern.matcher(line); - double totalCount = 0; - while (matcher.find()) { + while (matcher.find()) countMap.count(matcher.group(1)); - totalCount++; - } + + int maxCount = 0; + for (final Entry<String, Integer> entry : countMap.entrySet()) + if (entry.getValue() > maxCount) + maxCount = entry.getValue(); + final double minShare = maxCount * Constants.MINIMUM_RELATIVE_PROB; // create list of topics refs referencing topics with counted // occurrences, sum accepted topic word count @@ -208,7 +207,7 @@ public class JGibbAnalyzer extends Analyzer { final List<TopicRef> newTopicRefs = new ArrayList<>(countMap.size()); for (final Entry<String, Integer> entry : countMap.entrySet()) { // check if topic above threshold - if ((entry.getValue() / totalCount) >= Constants.TOPIC_THRESHOLD) { + if (entry.getValue() >= minShare) { reducedCount += entry.getValue(); final TopicFull topic = newTopics.get(Integer.parseInt(entry.getKey())); // TODO words with low relative likeliness are ignored. @@ -245,7 +244,7 @@ public class JGibbAnalyzer extends Analyzer { in.close(); - } catch (final IOException e) { + } catch (final Exception e) { throw new AnalyzerException(e); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java index 796fad9cbe29f7e46f93e5aa810ce9b354541c65..74ae029cba21ccd5be5b9553c24892265fe42c53 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java @@ -14,6 +14,13 @@ public class ModelingCommand implements Command { private Config config; private Analyzer analyzer; + private boolean reread; + + public ModelingCommand() {} + + public ModelingCommand(final boolean reread) { + this.reread = reread; + } @Override public void run() throws Exception { @@ -29,7 +36,7 @@ public class ModelingCommand implements Command { * do topic modeling */ log.info("topic modeling"); - analyzer.analyze(); + analyzer.analyze(reread); timer.lap("topic modeling"); /* diff --git a/vipra-ui/app/html/about.html b/vipra-ui/app/html/about.html index bad0047e78a1b784c4cc4d9f97fa3ef1cf415062..1a5b2502a86a3c7c237fa8f5061c9f5d38821216 100644 --- a/vipra-ui/app/html/about.html +++ b/vipra-ui/app/html/about.html @@ -148,30 +148,12 @@ </td> </tr> <tr> - <th>Likeliness precision</th> - <td ng-bind-template="{{::info.const.likeprecision}}"></td> + <th>Minimum relative probability</th> + <td ng-bind-template="{{::info.const.minrelprob}}"></td> </tr> <tr class="well"> <td colspan="2"> - The resulting likeliness precision of topic words. - </td> - </tr> - <tr> - <th>Minimum likeliness</th> - <td ng-bind-template="{{::info.const.minimumlike}}"></td> - </tr> - <tr class="well"> - <td colspan="2"> - The minimum likeliness of topic words. Words with a lesser likeliness are ignored. - </td> - </tr> - <tr> - <th>Topic share threshold</th> - <td ng-bind-template="{{::info.const.topicthresh}}"></td> - </tr> - <tr class="well"> - <td colspan="2"> - The minimum share value of a topic to be considered associated to an article. Topics with a lower share are ignored. + The minimum relative probability of topic words. Words are accepted into a topic, if their probability exceeds <it>maximum_probability * minimum_relative_probability</it>. </td> </tr> <tr> diff --git a/vipra-ui/app/html/index.html b/vipra-ui/app/html/index.html index 532e4c38d728fc1e22d7bc8e21ed31389860b024..c95c3c01c88287372cdfb965b85a984281772b44 100644 --- a/vipra-ui/app/html/index.html +++ b/vipra-ui/app/html/index.html @@ -10,7 +10,7 @@ </div> </div> <div class="row" ng-hide="search"> - <div class="col-md-6 text-center"> + <div class="col-md-8 text-center"> <h4>Latest articles</h4> <ul class="list-unstyled"> <li class="ellipsize" ng-repeat="article in latestArticles"> @@ -18,7 +18,7 @@ </li> </ul> </div> - <div class="col-md-3 text-center"> + <div class="col-md-4 text-center"> <h4>Latest topics</h4> <ul class="list-unstyled"> <li class="ellipsize" ng-repeat="topic in latestTopics"> @@ -26,14 +26,6 @@ </li> </ul> </div> - <div class="col-md-3 text-center"> - <h4>Latest words</h4> - <ul class="list-unstyled"> - <li class="ellipsize" ng-repeat="word in latestWords"> - <a ui-sref="words.show({id:word.id})" ng-bind="word.id"></a> - </li> - </ul> - </div> </div> <div class="row row-spaced"> <div class="col-md-12"> @@ -41,7 +33,7 @@ </div> </div> <div class="row row-spaced"> - <div class="text-center" ng-show="searching"> + <div class="col-md-12 text-center" ng-show="searching"> Searching... </div> <div class="col-md-12" ng-show="!searching && search && (!searchResults || searchResults.length == 0)"> diff --git a/vipra-ui/app/html/topics/show.html b/vipra-ui/app/html/topics/show.html index d0cdb176810f510bc0d25269cf41a624d70afe0f..b94a20787453764bba6fb9d2d78192d908d36558 100644 --- a/vipra-ui/app/html/topics/show.html +++ b/vipra-ui/app/html/topics/show.html @@ -54,29 +54,31 @@ </table> </div> </div> - <h3>Words <hide-link target="#words"/></h3> - <div class="row" id="words"> - <div class="col-md-12"> - <table class="table table-bordered table-condensed table-fixed"> - <thead> - <tr> - <th sort-by="id" sort-type="wordSort" sort-reverse="wordSortRev"> - Word - </th> - <th sort-by="likeliness" sort-type="wordSort" sort-reverse="wordSortRev"> - Likeliness - </th> - </tr> - </thead> - <tbody> - <tr ng-repeat="word in topic.words | orderBy:wordSort:wordSortRev"> - <td> - <a ui-sref="words.show({id:word.id})" ng-bind="word.id"></a> - </td> - <td ng-bind-template="{{word.likeliness.toFixed(6)}}"></td> - </tr> - </tbody> - </table> + <div ng-show="topic.dynamic"> + <h3>Words <hide-link target="#words"/></h3> + <div class="row" id="words"> + <div class="col-md-12"> + <table class="table table-bordered table-condensed table-fixed"> + <thead> + <tr> + <th sort-by="id" sort-type="wordSort" sort-reverse="wordSortRev"> + Word + </th> + <th sort-by="likeliness" sort-type="wordSort" sort-reverse="wordSortRev"> + Likeliness + </th> + </tr> + </thead> + <tbody> + <tr ng-repeat="word in topic.words | orderBy:wordSort:wordSortRev"> + <td> + <a ui-sref="words.show({id:word.id})" ng-bind="word.id"></a> + </td> + <td ng-bind-template="{{word.likeliness.toFixed(6)}}"></td> + </tr> + </tbody> + </table> + </div> </div> </div> </div> diff --git a/vipra-ui/app/html/words/index.html b/vipra-ui/app/html/words/index.html deleted file mode 100644 index 2875ec90232c6d7d26d213d44b899f18afac2ae4..0000000000000000000000000000000000000000 --- a/vipra-ui/app/html/words/index.html +++ /dev/null @@ -1,45 +0,0 @@ -<div ng-cloak ng-hide="$state.current.name !== 'words'"> - <div class="text-muted"> - Found - <ng-pluralize count="wordsTotal||0" when="{0:'no words',1:'1 word',other:'{} words'}"></ng-pluralize> in the database. - <span ng-show="wordsTotal"> - Sort by - <ol class="nya-bs-select nya-bs-condensed" ng-model="sort"> - <li value="id" class="nya-bs-option"><a>Word</a></li> - <li value="created" class="nya-bs-option"><a>Added</a></li> - </ol> - Direction - <ol class="nya-bs-select nya-bs-condensed" ng-model="order"> - <li value="+" class="nya-bs-option"><a>Ascending</a></li> - <li value="-" class="nya-bs-option"><a>Descending</a></li> - </ol> - </span> - <br> Page <span ng-bind="page||1"></span> of <span ng-bind="maxPage||1"></span>. - </div> - <pagination total="wordsTotal" page="page" limit="limit" change="changePage" /> - <div class="row"> - <div class="col-md-4"> - <ul class="list-unstyled"> - <li ng-repeat="word in words.slice(0,100)"> - <a ui-sref="words.show({id: word.id})">{{word.id}}</a> - </li> - </ul> - </div> - <div class="col-md-4"> - <ul class="list-unstyled"> - <li ng-repeat="word in words.slice(100,200)"> - <a ui-sref="words.show({id: word.id})">{{word.id}}</a> - </li> - </ul> - </div> - <div class="col-md-4"> - <ul class="list-unstyled"> - <li ng-repeat="word in words.slice(200,300)"> - <a ui-sref="words.show({id: word.id})">{{word.id}}</a> - </li> - </ul> - </div> - </div> - <pagination total="wordsTotal" page="page" limit="limit" /> -</div> -<div ng-cloak ui-view></div> diff --git a/vipra-ui/app/html/words/show.html b/vipra-ui/app/html/words/show.html deleted file mode 100644 index 11857549ded6b2930dcec8cd45104ec3a1da907d..0000000000000000000000000000000000000000 --- a/vipra-ui/app/html/words/show.html +++ /dev/null @@ -1,29 +0,0 @@ -<div ng-cloak ng-hide="$state.current.name !== 'words.show'"> - <div class="page-header"> - <h1 ng-bind="::word.id"></h1> - </div> - <h3>Info <hide-link target="#info"/></h3> - <div class="row" id="info"> - <div class="col-md-12"> - <table class="table table-bordered table-condensed table-fixed table-infos"> - <tbody> - <tr> - <th>Created</th> - <td ng-bind="::wordCreated"></td> - </tr> - </tbody> - </table> - </div> - </div> - <h3>Topics <hide-link target="#topics"/></h3> - <div class="row" id="topics"> - <div class="col-md-12"> - <ol> - <li ng-repeat="topic in ::topics"> - <topic-link topic="topic" /> - </li> - </ol> - </div> - </div> -</div> -<div ng-cloak ui-view></div> diff --git a/vipra-ui/app/index.html b/vipra-ui/app/index.html index cbef9640a590700e2af0eb00dea3d17b75c5d3d4..666f2d800f038fddf0dcca03b9398098f633b6c8 100644 --- a/vipra-ui/app/index.html +++ b/vipra-ui/app/index.html @@ -62,9 +62,6 @@ <li ng-class="{active:$state.includes('topics')}"> <a ui-sref="topics">Topics</a> </li> - <li ng-class="{active:$state.includes('words')}"> - <a ui-sref="words">Words</a> - </li> </ul> <ul class="nav navbar-nav navbar-right"> <li ng-class="{active:$state.includes('about')}"> diff --git a/vipra-ui/app/js/app.js b/vipra-ui/app/js/app.js index 61d949d5b46ff5d21d6a420daafd449bc6778a57..df5074773c5f33a30b3af8e4ab68e861217460c7 100644 --- a/vipra-ui/app/js/app.js +++ b/vipra-ui/app/js/app.js @@ -103,26 +103,6 @@ } }); - // states: words - - $stateProvider.state('words', { - url: '/words', - templateUrl: 'html/words/index.html', - controller: 'WordsIndexController', - ncyBreadcrumb: { - label: 'Words' - } - }); - - $stateProvider.state('words.show', { - url: '/:id', - templateUrl: 'html/words/show.html', - controller: 'WordsShowController', - ncyBreadcrumb: { - label: '{{word.id}}' - } - }); - }]); var loadingTimeout; diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js index 66cfa1d973d10c20211410859ba4c94dbc98daae..b74f779177f7b809f65032696e5a632a5d2c79f1 100644 --- a/vipra-ui/app/js/controllers.js +++ b/vipra-ui/app/js/controllers.js @@ -42,15 +42,6 @@ $scope.errors = err; }); - WordFactory.query({ - limit: Vipra.config.latestItems, - sort: '-created' - }, function(data) { - $scope.latestWords = data; - }, function(err) { - $scope.errors = err; - }); - $scope.$watch('search', function() { if ($scope.search) { $location.search('query', $scope.search); @@ -494,64 +485,6 @@ } ]); - /**************************************************************************** - * Word Controllers - ****************************************************************************/ - - /** - * Word Index route - */ - app.controller('WordsIndexController', ['$scope', '$state', '$location', 'Store', 'WordFactory', - function($scope, $state, $location, Store, WordFactory) { - - $scope.page = Math.max($location.search().page || 1, 1); - $scope.limit = 300; - $scope.sort = Store('sortwords') || 'id'; - $scope.order = Store('orderwords') || '+'; - - $scope.$watchGroup(['page', 'sort', 'order'], function() { - WordFactory.query({ - skip: ($scope.page - 1) * $scope.limit, - limit: $scope.limit, - sort: $scope.order + $scope.sort - }, function(data, headers) { - $scope.words = data; - $scope.wordsTotal = headers("V-Total"); - $scope.maxPage = Math.ceil($scope.wordsTotal / $scope.limit); - }, function(err) { - $scope.errors = err; - }); - }); - - } - ]); - - /** - * Word Show route - */ - app.controller('WordsShowController', ['$scope', '$stateParams', 'WordFactory', - function($scope, $stateParams, WordFactory) { - - WordFactory.get({ - id: $stateParams.id - }, function(data) { - $scope.word = data; - $scope.wordCreated = Vipra.formatDateTime($scope.word.created); - }, function(err) { - $scope.errors = err; - }); - - WordFactory.topics({ - id: $stateParams.id - }, function(data) { - $scope.topics = data; - }, function(err) { - $scope.errors = err; - }); - - } - ]); - /**************************************************************************** * Directive Controllers ****************************************************************************/ diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index a331195d0f2632e3bcebdcfbbeb055ab642dad1f..9dd6e9a6dc73180acea15bcb26dad66dd24f36c0 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -45,12 +45,12 @@ public class Constants { public static final int ES_PORT = 9300; /** - * Topic boost parameter. Boosts topic importance in queries. + * Topic boost parameter. Boosts topic importance in queries. Default 4. */ public static final int ES_BOOST_TOPICS = 4; /** - * Title boost parameter. Boosts title importance in queries. + * Title boost parameter. Boosts title importance in queries. Default 2. */ public static final int ES_BOOST_TITLES = 2; @@ -61,61 +61,53 @@ public class Constants { /** * The number of words to be used to generate a topic name. The top n words * (sorted by likeliness) are used to generate a name for unnamed topics. + * Default 4. */ public static final int TOPIC_AUTO_NAMING_WORDS = 4; /** * Number of topics to discover with topic modeling, if the selected topic - * modeling library supports this parameter. + * modeling library supports this parameter. Default 20. */ - public static final int K_TOPICS = 25; + public static final int K_TOPICS = 20; /** * Number of words in a discovered topic, if the selected topic modeling - * library supports this parameter. + * library supports this parameter. Default 50. */ public static final int K_TOPIC_WORDS = 50; /** - * Minimum likeliness of words. Words with lower likeliness are ignored + * Minimum likeliness of words. Words with lower likeliness are ignored. + * Default 0.01. */ public static final double MINIMUM_RELATIVE_PROB = 0.01; /** - * - */ - public static final double PERCENT_PROB = 0.9; - - /** - * Topics with a share greater or equal to this number are regarded as - * accepted topics to that article. Value range: [0.0, 1.0] - */ - public static final double TOPIC_THRESHOLD = 0.01; - - /** - * Dynamic minimum iterations. Used for dynamic topic modeling. + * Dynamic minimum iterations. Used for dynamic topic modeling. Default 100. */ public static final int DYNAMIC_MIN_ITER = 100; /** - * Dynamic maximum iterations. Used for dynamic topic modeling. + * Dynamic maximum iterations. Used for dynamic topic modeling. Default + * 1000. */ - public static final int DYNAMIC_MAX_ITER = 500; + public static final int DYNAMIC_MAX_ITER = 1000; /** - * Static iterations. Used for static topic modeling. + * Static iterations. Used for static topic modeling. Default 100. */ - public static final int STATIC_ITER = 100; + public static final int STATIC_ITER = 200; /** * Minimum word frequency for words to be used for topic modeling. All words * below this frequency in a document are filtered out before generating the - * topic model. + * topic model. Default 10. */ public static final int DOCUMENT_MIN_WORD_FREQ = 10; /** - * Minumum number of words per document. + * Minumum number of words per document. Default 10. */ public static final int DOCUMENT_MIN_LENGTH = 10; diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java index 6a125144c5a1f0cd8265fbcfeba3baea051e7252..07f6de3709e241c42b0106bcb07a743ea6defcd8 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java @@ -36,6 +36,12 @@ public class TopicFull implements Model<ObjectId>, Serializable { @QueryIgnore(multi = true) private List<Sequence> sequences; + private Double avgRelevance; + + private Double varRelevance; + + private boolean dynamic = false; + private Date created; private Date modified; @@ -86,6 +92,30 @@ public class TopicFull implements Model<ObjectId>, Serializable { this.sequences = sequences; } + public Double getAvgRelevance() { + return avgRelevance; + } + + public void setAvgRelevance(Double avgRelevance) { + this.avgRelevance = avgRelevance; + } + + public Double getVarRelevance() { + return varRelevance; + } + + public void setVarRelevance(Double varRelevance) { + this.varRelevance = varRelevance; + } + + public boolean isDynamic() { + return dynamic; + } + + public void setDynamic(boolean dynamic) { + this.dynamic = dynamic; + } + public Date getCreated() { return created; } @@ -144,7 +174,8 @@ public class TopicFull implements Model<ObjectId>, Serializable { @Override public String toString() { return "TopicFull [id=" + id + ", name=" + name + ", index=" + index + ", words=" + words + ", sequences=" - + sequences + ", created=" + created + ", modified=" + modified + "]"; + + sequences + ", avgRelevance=" + avgRelevance + ", varRelevance=" + varRelevance + ", created=" + + created + ", modified=" + modified + "]"; } }