From 259067f881d3d10ed0c0dda9f529569aae3fe93b Mon Sep 17 00:00:00 2001 From: Eike Cochu <eike@cochu.com> Date: Mon, 7 Mar 2016 20:40:53 +0100 Subject: [PATCH] added reread command reread command to reread model files without modeling removed word pages, controllers, etc. added dynamic attribute to topic removed unused constants removed percent cutoff constant updated about page fixed frontpage searching text misalignment added avgrelevance and varrelevance to topic --- .../de/vipra/rest/resource/InfoResource.java | 3 +- vipra-cmd/runcfg/CMD - Reread.launch | 18 +++++ .../main/java/de/vipra/cmd/CmdOptions.java | 5 ++ .../src/main/java/de/vipra/cmd/Main.java | 5 +- .../main/java/de/vipra/cmd/lda/Analyzer.java | 2 +- .../java/de/vipra/cmd/lda/DTMAnalyzer.java | 64 ++++++++++-------- .../java/de/vipra/cmd/lda/JGibbAnalyzer.java | 33 +++++---- .../de/vipra/cmd/option/ModelingCommand.java | 9 ++- vipra-ui/app/html/about.html | 24 +------ vipra-ui/app/html/index.html | 14 +--- vipra-ui/app/html/topics/show.html | 48 ++++++------- vipra-ui/app/html/words/index.html | 45 ------------- vipra-ui/app/html/words/show.html | 29 -------- vipra-ui/app/index.html | 3 - vipra-ui/app/js/app.js | 20 ------ vipra-ui/app/js/controllers.js | 67 ------------------- .../main/java/de/vipra/util/Constants.java | 40 +++++------ .../java/de/vipra/util/model/TopicFull.java | 33 ++++++++- 18 files changed, 168 insertions(+), 294 deletions(-) create mode 100644 vipra-cmd/runcfg/CMD - Reread.launch delete mode 100644 vipra-ui/app/html/words/index.html delete mode 100644 vipra-ui/app/html/words/show.html diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java index c999d85f..a9f0de4f 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/InfoResource.java @@ -79,8 +79,7 @@ public class InfoResource { info.put("const.topicautoname", Constants.TOPIC_AUTO_NAMING_WORDS); info.put("const.ktopics", Constants.K_TOPICS); info.put("const.ktopicwords", Constants.K_TOPIC_WORDS); - info.put("const.minimumlike", Constants.MINIMUM_RELATIVE_PROB); - info.put("const.topicthresh", Constants.TOPIC_THRESHOLD); + info.put("const.minrelprob", Constants.MINIMUM_RELATIVE_PROB); info.put("const.dynminiter", Constants.DYNAMIC_MIN_ITER); info.put("const.dynmaxiter", Constants.DYNAMIC_MAX_ITER); info.put("const.statiter", Constants.STATIC_ITER); diff --git a/vipra-cmd/runcfg/CMD - Reread.launch b/vipra-cmd/runcfg/CMD - Reread.launch new file mode 100644 index 00000000..3d3851ee --- /dev/null +++ b/vipra-cmd/runcfg/CMD - Reread.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-r"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Dlog4j.configurationFile=log4j2dev.xml"/> +</launchConfiguration> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java b/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java index a7c34652..7498b4a2 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/CmdOptions.java @@ -44,6 +44,9 @@ public class CmdOptions extends Options { public static final String OPT_INDEXING = "e"; public static final String OPT_INDEXING_LONG = "indexing"; + public static final String OPT_REREAD = "r"; + public static final String OPT_REREAD_LONG = "reread"; + public CmdOptions() { addOption(Option.builder(OPT_HELP).longOpt(OPT_HELP_LONG).desc("print this message").build()); addOption(Option.builder(OPT_SHELL).longOpt(OPT_SHELL_LONG).hasArg(true).argName("name") @@ -60,6 +63,8 @@ public class CmdOptions extends Options { addOption(Option.builder(OPT_CONFIG).longOpt(OPT_CONFIG_LONG).desc("show configuration").build()); addOption(Option.builder(OPT_MODELING).longOpt(OPT_MODELING_LONG).desc("regenerate topic model").build()); addOption(Option.builder(OPT_INDEXING).longOpt(OPT_INDEXING_LONG).desc("regenerate search index").build()); + addOption(Option.builder(OPT_REREAD).longOpt(OPT_REREAD_LONG) + .desc("reread model files, ignored when remodeling").build()); } public void printHelp(final String cmd) { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java index c441664c..e50d9f9b 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java @@ -8,6 +8,7 @@ import static de.vipra.cmd.CmdOptions.OPT_HELP; import static de.vipra.cmd.CmdOptions.OPT_IMPORT; import static de.vipra.cmd.CmdOptions.OPT_INDEXING; import static de.vipra.cmd.CmdOptions.OPT_MODELING; +import static de.vipra.cmd.CmdOptions.OPT_REREAD; import static de.vipra.cmd.CmdOptions.OPT_SHELL; import static de.vipra.cmd.CmdOptions.OPT_SILENT; import static de.vipra.cmd.CmdOptions.OPT_STATS; @@ -111,8 +112,8 @@ public class Main { if (cline.hasOption(OPT_IMPORT)) commands.add(new ImportCommand(cline.getOptionValues(OPT_IMPORT))); - if (cline.hasOption(OPT_MODELING)) - commands.add(new ModelingCommand()); + if (cline.hasOption(OPT_MODELING) || cline.hasOption(OPT_REREAD)) + commands.add(new ModelingCommand(!cline.hasOption(OPT_MODELING) && cline.hasOption(OPT_REREAD))); if (cline.hasOption(OPT_INDEXING)) commands.add(new IndexingCommand()); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java index 8f9ce5bd..88a31bcf 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/Analyzer.java @@ -18,7 +18,7 @@ public abstract class Analyzer { public abstract void init(Config config) throws AnalyzerException; - public abstract void analyze() throws AnalyzerException; + public abstract void analyze(boolean reread) throws AnalyzerException; public static Analyzer getAnalyzer(final Config config) throws AnalyzerException { Analyzer analyzer = null; diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java index 925b61fa..88eb488d 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java @@ -116,25 +116,29 @@ public class DTMAnalyzer extends Analyzer { } @Override - public void analyze() throws AnalyzerException { + public void analyze(final boolean reread) throws AnalyzerException { try { - final Process p = Runtime.getRuntime().exec(command, null); - if (!p.isAlive()) - throw new AnalyzerException("dtm process is dead"); - - // read from process output - BufferedReader in = new BufferedReader(new InputStreamReader(p.getErrorStream())); - - String line; - int iteration = 0; - while ((line = in.readLine()) != null) { - if (line.contains("EM iter")) { - log.info("iteration " + iteration++); + BufferedReader in; + + if (!reread) { + final Process p = Runtime.getRuntime().exec(command, null); + if (!p.isAlive()) + throw new AnalyzerException("dtm process is dead"); + + // read from process output + in = new BufferedReader(new InputStreamReader(p.getErrorStream())); + + String line; + int iteration = 0; + while ((line = in.readLine()) != null) { + if (line.contains("EM iter")) { + log.info("iteration " + iteration++); + } } - } - in.close(); - p.waitFor(); + in.close(); + p.waitFor(); + } final int wordCount = vocab.size(); final int sequencesCount = seqindex.sequenceCount(); @@ -175,7 +179,6 @@ public class DTMAnalyzer extends Analyzer { log.info("topics: " + Constants.K_TOPICS); final boolean seqRelativeCutoff = Constants.MINIMUM_RELATIVE_PROB > 0; - final boolean seqPercentCutoff = Constants.PERCENT_PROB < 1; // for each topic for (int idxTopic = 0; idxTopic < Constants.K_TOPICS; idxTopic++) { @@ -186,6 +189,7 @@ public class DTMAnalyzer extends Analyzer { final TopicFull newTopic = new TopicFull(); final List<Sequence> newTopicSequences = new ArrayList<>(sequencesCount); newTopic.setSequences(newTopicSequences); + newTopic.setDynamic(true); newTopics.add(newTopic); in = new BufferedReader(new InputStreamReader(new FileInputStream(seqFile))); @@ -207,6 +211,9 @@ public class DTMAnalyzer extends Analyzer { // collect top words in each sequence for topic name final Set<TopicWord> topTopicWords = new HashSet<>(); + final double[] relevances = new double[sequencesCount]; + double relevanceSum = 0; + // for each sequence for (int idxSeq = 0, sequenceOffset = 0; idxSeq < sequencesCount; idxSeq++) { // calculate relative cutoff probability @@ -231,12 +238,6 @@ public class DTMAnalyzer extends Analyzer { if (!newSeqTopicWords.isEmpty()) { Collections.sort(newSeqTopicWords, Comparator.reverseOrder()); - // top n percent cutoff - if (seqPercentCutoff) { - final int fromIndex = (int) Math.round(newSeqTopicWords.size() * Constants.PERCENT_PROB); - newSeqTopicWords.subList(fromIndex, newSeqTopicWords.size()).clear(); - } - // collect top words topTopicWords.addAll(newSeqTopicWords.subList(0, Math.min(newSeqTopicWords.size(), Constants.TOPIC_AUTO_NAMING_WORDS))); @@ -261,16 +262,25 @@ public class DTMAnalyzer extends Analyzer { newTopicSequences.add(new Sequence(newSequenceFull.getId())); sequenceOffset += sequenceSize; + relevanceSum += relevance; + relevances[idxSeq] = relevance; } // sort topic words and generate topic name final List<TopicWord> topTopicWordsList = new ArrayList<>(topTopicWords); Collections.sort(topTopicWordsList); newTopic.setName(TopicFull.getNameFromWords(topTopicWordsList)); - } - log.info("creating " + newTopics.size() + " " + StringUtils.quantity(newTopics.size(), "topic")); - log.info("creating " + newWords.size() + " " + StringUtils.quantity(newWords.size(), "word")); + // calculate average + final double average = relevanceSum / sequencesCount; + newTopic.setAvgRelevance(average); + + // calculate variance + double variance = 0; + for (final double relevance : relevances) + variance += Math.pow(relevance - average, 2); + newTopic.setVarRelevance(variance / sequencesCount); + } // recreate topics and words dbSequences.drop(); @@ -318,7 +328,7 @@ public class DTMAnalyzer extends Analyzer { } } - } catch (IOException | InterruptedException e) { + } catch (final Exception e) { throw new AnalyzerException(e); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java index a9814c61..7f94afc6 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java @@ -90,16 +90,18 @@ public class JGibbAnalyzer extends Analyzer { } @Override - public void analyze() throws AnalyzerException { + public void analyze(final boolean reread) throws AnalyzerException { if (!modelFile.exists()) { throw new AnalyzerException("model file does not exist: " + modelFile.getAbsolutePath()); } - final Estimator estimator = new Estimator(); - estimator.init(options); - estimator.estimate(); + + if (!reread) { + final Estimator estimator = new Estimator(); + estimator.init(options); + estimator.estimate(); + } final boolean seqRelativeCutoff = Constants.MINIMUM_RELATIVE_PROB > 0; - final boolean seqPercentCutoff = Constants.PERCENT_PROB < 1; // read topic definitions and save @@ -159,12 +161,6 @@ public class JGibbAnalyzer extends Analyzer { if (!filteredTopicWords.isEmpty()) { Collections.sort(filteredTopicWords, Collections.reverseOrder()); - // top n percent cutoff - if (seqPercentCutoff) { - final int fromIndex = (int) Math.round(filteredTopicWords.size() * Constants.PERCENT_PROB); - filteredTopicWords.subList(fromIndex, filteredTopicWords.size()).clear(); - } - topic.setName(TopicFull.getNameFromWords(filteredTopicWords)); } } @@ -196,11 +192,14 @@ public class JGibbAnalyzer extends Analyzer { // extract topic ids and count them final CountMap<String> countMap = new CountMap<>(); final Matcher matcher = topicIndexPattern.matcher(line); - double totalCount = 0; - while (matcher.find()) { + while (matcher.find()) countMap.count(matcher.group(1)); - totalCount++; - } + + int maxCount = 0; + for (final Entry<String, Integer> entry : countMap.entrySet()) + if (entry.getValue() > maxCount) + maxCount = entry.getValue(); + final double minShare = maxCount * Constants.MINIMUM_RELATIVE_PROB; // create list of topics refs referencing topics with counted // occurrences, sum accepted topic word count @@ -208,7 +207,7 @@ public class JGibbAnalyzer extends Analyzer { final List<TopicRef> newTopicRefs = new ArrayList<>(countMap.size()); for (final Entry<String, Integer> entry : countMap.entrySet()) { // check if topic above threshold - if ((entry.getValue() / totalCount) >= Constants.TOPIC_THRESHOLD) { + if (entry.getValue() >= minShare) { reducedCount += entry.getValue(); final TopicFull topic = newTopics.get(Integer.parseInt(entry.getKey())); // TODO words with low relative likeliness are ignored. @@ -245,7 +244,7 @@ public class JGibbAnalyzer extends Analyzer { in.close(); - } catch (final IOException e) { + } catch (final Exception e) { throw new AnalyzerException(e); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java index 796fad9c..74ae029c 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java @@ -14,6 +14,13 @@ public class ModelingCommand implements Command { private Config config; private Analyzer analyzer; + private boolean reread; + + public ModelingCommand() {} + + public ModelingCommand(final boolean reread) { + this.reread = reread; + } @Override public void run() throws Exception { @@ -29,7 +36,7 @@ public class ModelingCommand implements Command { * do topic modeling */ log.info("topic modeling"); - analyzer.analyze(); + analyzer.analyze(reread); timer.lap("topic modeling"); /* diff --git a/vipra-ui/app/html/about.html b/vipra-ui/app/html/about.html index bad0047e..1a5b2502 100644 --- a/vipra-ui/app/html/about.html +++ b/vipra-ui/app/html/about.html @@ -148,30 +148,12 @@ </td> </tr> <tr> - <th>Likeliness precision</th> - <td ng-bind-template="{{::info.const.likeprecision}}"></td> + <th>Minimum relative probability</th> + <td ng-bind-template="{{::info.const.minrelprob}}"></td> </tr> <tr class="well"> <td colspan="2"> - The resulting likeliness precision of topic words. - </td> - </tr> - <tr> - <th>Minimum likeliness</th> - <td ng-bind-template="{{::info.const.minimumlike}}"></td> - </tr> - <tr class="well"> - <td colspan="2"> - The minimum likeliness of topic words. Words with a lesser likeliness are ignored. - </td> - </tr> - <tr> - <th>Topic share threshold</th> - <td ng-bind-template="{{::info.const.topicthresh}}"></td> - </tr> - <tr class="well"> - <td colspan="2"> - The minimum share value of a topic to be considered associated to an article. Topics with a lower share are ignored. + The minimum relative probability of topic words. Words are accepted into a topic, if their probability exceeds <it>maximum_probability * minimum_relative_probability</it>. </td> </tr> <tr> diff --git a/vipra-ui/app/html/index.html b/vipra-ui/app/html/index.html index 532e4c38..c95c3c01 100644 --- a/vipra-ui/app/html/index.html +++ b/vipra-ui/app/html/index.html @@ -10,7 +10,7 @@ </div> </div> <div class="row" ng-hide="search"> - <div class="col-md-6 text-center"> + <div class="col-md-8 text-center"> <h4>Latest articles</h4> <ul class="list-unstyled"> <li class="ellipsize" ng-repeat="article in latestArticles"> @@ -18,7 +18,7 @@ </li> </ul> </div> - <div class="col-md-3 text-center"> + <div class="col-md-4 text-center"> <h4>Latest topics</h4> <ul class="list-unstyled"> <li class="ellipsize" ng-repeat="topic in latestTopics"> @@ -26,14 +26,6 @@ </li> </ul> </div> - <div class="col-md-3 text-center"> - <h4>Latest words</h4> - <ul class="list-unstyled"> - <li class="ellipsize" ng-repeat="word in latestWords"> - <a ui-sref="words.show({id:word.id})" ng-bind="word.id"></a> - </li> - </ul> - </div> </div> <div class="row row-spaced"> <div class="col-md-12"> @@ -41,7 +33,7 @@ </div> </div> <div class="row row-spaced"> - <div class="text-center" ng-show="searching"> + <div class="col-md-12 text-center" ng-show="searching"> Searching... </div> <div class="col-md-12" ng-show="!searching && search && (!searchResults || searchResults.length == 0)"> diff --git a/vipra-ui/app/html/topics/show.html b/vipra-ui/app/html/topics/show.html index d0cdb176..b94a2078 100644 --- a/vipra-ui/app/html/topics/show.html +++ b/vipra-ui/app/html/topics/show.html @@ -54,29 +54,31 @@ </table> </div> </div> - <h3>Words <hide-link target="#words"/></h3> - <div class="row" id="words"> - <div class="col-md-12"> - <table class="table table-bordered table-condensed table-fixed"> - <thead> - <tr> - <th sort-by="id" sort-type="wordSort" sort-reverse="wordSortRev"> - Word - </th> - <th sort-by="likeliness" sort-type="wordSort" sort-reverse="wordSortRev"> - Likeliness - </th> - </tr> - </thead> - <tbody> - <tr ng-repeat="word in topic.words | orderBy:wordSort:wordSortRev"> - <td> - <a ui-sref="words.show({id:word.id})" ng-bind="word.id"></a> - </td> - <td ng-bind-template="{{word.likeliness.toFixed(6)}}"></td> - </tr> - </tbody> - </table> + <div ng-show="topic.dynamic"> + <h3>Words <hide-link target="#words"/></h3> + <div class="row" id="words"> + <div class="col-md-12"> + <table class="table table-bordered table-condensed table-fixed"> + <thead> + <tr> + <th sort-by="id" sort-type="wordSort" sort-reverse="wordSortRev"> + Word + </th> + <th sort-by="likeliness" sort-type="wordSort" sort-reverse="wordSortRev"> + Likeliness + </th> + </tr> + </thead> + <tbody> + <tr ng-repeat="word in topic.words | orderBy:wordSort:wordSortRev"> + <td> + <a ui-sref="words.show({id:word.id})" ng-bind="word.id"></a> + </td> + <td ng-bind-template="{{word.likeliness.toFixed(6)}}"></td> + </tr> + </tbody> + </table> + </div> </div> </div> </div> diff --git a/vipra-ui/app/html/words/index.html b/vipra-ui/app/html/words/index.html deleted file mode 100644 index 2875ec90..00000000 --- a/vipra-ui/app/html/words/index.html +++ /dev/null @@ -1,45 +0,0 @@ -<div ng-cloak ng-hide="$state.current.name !== 'words'"> - <div class="text-muted"> - Found - <ng-pluralize count="wordsTotal||0" when="{0:'no words',1:'1 word',other:'{} words'}"></ng-pluralize> in the database. - <span ng-show="wordsTotal"> - Sort by - <ol class="nya-bs-select nya-bs-condensed" ng-model="sort"> - <li value="id" class="nya-bs-option"><a>Word</a></li> - <li value="created" class="nya-bs-option"><a>Added</a></li> - </ol> - Direction - <ol class="nya-bs-select nya-bs-condensed" ng-model="order"> - <li value="+" class="nya-bs-option"><a>Ascending</a></li> - <li value="-" class="nya-bs-option"><a>Descending</a></li> - </ol> - </span> - <br> Page <span ng-bind="page||1"></span> of <span ng-bind="maxPage||1"></span>. - </div> - <pagination total="wordsTotal" page="page" limit="limit" change="changePage" /> - <div class="row"> - <div class="col-md-4"> - <ul class="list-unstyled"> - <li ng-repeat="word in words.slice(0,100)"> - <a ui-sref="words.show({id: word.id})">{{word.id}}</a> - </li> - </ul> - </div> - <div class="col-md-4"> - <ul class="list-unstyled"> - <li ng-repeat="word in words.slice(100,200)"> - <a ui-sref="words.show({id: word.id})">{{word.id}}</a> - </li> - </ul> - </div> - <div class="col-md-4"> - <ul class="list-unstyled"> - <li ng-repeat="word in words.slice(200,300)"> - <a ui-sref="words.show({id: word.id})">{{word.id}}</a> - </li> - </ul> - </div> - </div> - <pagination total="wordsTotal" page="page" limit="limit" /> -</div> -<div ng-cloak ui-view></div> diff --git a/vipra-ui/app/html/words/show.html b/vipra-ui/app/html/words/show.html deleted file mode 100644 index 11857549..00000000 --- a/vipra-ui/app/html/words/show.html +++ /dev/null @@ -1,29 +0,0 @@ -<div ng-cloak ng-hide="$state.current.name !== 'words.show'"> - <div class="page-header"> - <h1 ng-bind="::word.id"></h1> - </div> - <h3>Info <hide-link target="#info"/></h3> - <div class="row" id="info"> - <div class="col-md-12"> - <table class="table table-bordered table-condensed table-fixed table-infos"> - <tbody> - <tr> - <th>Created</th> - <td ng-bind="::wordCreated"></td> - </tr> - </tbody> - </table> - </div> - </div> - <h3>Topics <hide-link target="#topics"/></h3> - <div class="row" id="topics"> - <div class="col-md-12"> - <ol> - <li ng-repeat="topic in ::topics"> - <topic-link topic="topic" /> - </li> - </ol> - </div> - </div> -</div> -<div ng-cloak ui-view></div> diff --git a/vipra-ui/app/index.html b/vipra-ui/app/index.html index cbef9640..666f2d80 100644 --- a/vipra-ui/app/index.html +++ b/vipra-ui/app/index.html @@ -62,9 +62,6 @@ <li ng-class="{active:$state.includes('topics')}"> <a ui-sref="topics">Topics</a> </li> - <li ng-class="{active:$state.includes('words')}"> - <a ui-sref="words">Words</a> - </li> </ul> <ul class="nav navbar-nav navbar-right"> <li ng-class="{active:$state.includes('about')}"> diff --git a/vipra-ui/app/js/app.js b/vipra-ui/app/js/app.js index 61d949d5..df507477 100644 --- a/vipra-ui/app/js/app.js +++ b/vipra-ui/app/js/app.js @@ -103,26 +103,6 @@ } }); - // states: words - - $stateProvider.state('words', { - url: '/words', - templateUrl: 'html/words/index.html', - controller: 'WordsIndexController', - ncyBreadcrumb: { - label: 'Words' - } - }); - - $stateProvider.state('words.show', { - url: '/:id', - templateUrl: 'html/words/show.html', - controller: 'WordsShowController', - ncyBreadcrumb: { - label: '{{word.id}}' - } - }); - }]); var loadingTimeout; diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js index 66cfa1d9..b74f7791 100644 --- a/vipra-ui/app/js/controllers.js +++ b/vipra-ui/app/js/controllers.js @@ -42,15 +42,6 @@ $scope.errors = err; }); - WordFactory.query({ - limit: Vipra.config.latestItems, - sort: '-created' - }, function(data) { - $scope.latestWords = data; - }, function(err) { - $scope.errors = err; - }); - $scope.$watch('search', function() { if ($scope.search) { $location.search('query', $scope.search); @@ -494,64 +485,6 @@ } ]); - /**************************************************************************** - * Word Controllers - ****************************************************************************/ - - /** - * Word Index route - */ - app.controller('WordsIndexController', ['$scope', '$state', '$location', 'Store', 'WordFactory', - function($scope, $state, $location, Store, WordFactory) { - - $scope.page = Math.max($location.search().page || 1, 1); - $scope.limit = 300; - $scope.sort = Store('sortwords') || 'id'; - $scope.order = Store('orderwords') || '+'; - - $scope.$watchGroup(['page', 'sort', 'order'], function() { - WordFactory.query({ - skip: ($scope.page - 1) * $scope.limit, - limit: $scope.limit, - sort: $scope.order + $scope.sort - }, function(data, headers) { - $scope.words = data; - $scope.wordsTotal = headers("V-Total"); - $scope.maxPage = Math.ceil($scope.wordsTotal / $scope.limit); - }, function(err) { - $scope.errors = err; - }); - }); - - } - ]); - - /** - * Word Show route - */ - app.controller('WordsShowController', ['$scope', '$stateParams', 'WordFactory', - function($scope, $stateParams, WordFactory) { - - WordFactory.get({ - id: $stateParams.id - }, function(data) { - $scope.word = data; - $scope.wordCreated = Vipra.formatDateTime($scope.word.created); - }, function(err) { - $scope.errors = err; - }); - - WordFactory.topics({ - id: $stateParams.id - }, function(data) { - $scope.topics = data; - }, function(err) { - $scope.errors = err; - }); - - } - ]); - /**************************************************************************** * Directive Controllers ****************************************************************************/ diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index a331195d..9dd6e9a6 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -45,12 +45,12 @@ public class Constants { public static final int ES_PORT = 9300; /** - * Topic boost parameter. Boosts topic importance in queries. + * Topic boost parameter. Boosts topic importance in queries. Default 4. */ public static final int ES_BOOST_TOPICS = 4; /** - * Title boost parameter. Boosts title importance in queries. + * Title boost parameter. Boosts title importance in queries. Default 2. */ public static final int ES_BOOST_TITLES = 2; @@ -61,61 +61,53 @@ public class Constants { /** * The number of words to be used to generate a topic name. The top n words * (sorted by likeliness) are used to generate a name for unnamed topics. + * Default 4. */ public static final int TOPIC_AUTO_NAMING_WORDS = 4; /** * Number of topics to discover with topic modeling, if the selected topic - * modeling library supports this parameter. + * modeling library supports this parameter. Default 20. */ - public static final int K_TOPICS = 25; + public static final int K_TOPICS = 20; /** * Number of words in a discovered topic, if the selected topic modeling - * library supports this parameter. + * library supports this parameter. Default 50. */ public static final int K_TOPIC_WORDS = 50; /** - * Minimum likeliness of words. Words with lower likeliness are ignored + * Minimum likeliness of words. Words with lower likeliness are ignored. + * Default 0.01. */ public static final double MINIMUM_RELATIVE_PROB = 0.01; /** - * - */ - public static final double PERCENT_PROB = 0.9; - - /** - * Topics with a share greater or equal to this number are regarded as - * accepted topics to that article. Value range: [0.0, 1.0] - */ - public static final double TOPIC_THRESHOLD = 0.01; - - /** - * Dynamic minimum iterations. Used for dynamic topic modeling. + * Dynamic minimum iterations. Used for dynamic topic modeling. Default 100. */ public static final int DYNAMIC_MIN_ITER = 100; /** - * Dynamic maximum iterations. Used for dynamic topic modeling. + * Dynamic maximum iterations. Used for dynamic topic modeling. Default + * 1000. */ - public static final int DYNAMIC_MAX_ITER = 500; + public static final int DYNAMIC_MAX_ITER = 1000; /** - * Static iterations. Used for static topic modeling. + * Static iterations. Used for static topic modeling. Default 100. */ - public static final int STATIC_ITER = 100; + public static final int STATIC_ITER = 200; /** * Minimum word frequency for words to be used for topic modeling. All words * below this frequency in a document are filtered out before generating the - * topic model. + * topic model. Default 10. */ public static final int DOCUMENT_MIN_WORD_FREQ = 10; /** - * Minumum number of words per document. + * Minumum number of words per document. Default 10. */ public static final int DOCUMENT_MIN_LENGTH = 10; diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java index 6a125144..07f6de37 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicFull.java @@ -36,6 +36,12 @@ public class TopicFull implements Model<ObjectId>, Serializable { @QueryIgnore(multi = true) private List<Sequence> sequences; + private Double avgRelevance; + + private Double varRelevance; + + private boolean dynamic = false; + private Date created; private Date modified; @@ -86,6 +92,30 @@ public class TopicFull implements Model<ObjectId>, Serializable { this.sequences = sequences; } + public Double getAvgRelevance() { + return avgRelevance; + } + + public void setAvgRelevance(Double avgRelevance) { + this.avgRelevance = avgRelevance; + } + + public Double getVarRelevance() { + return varRelevance; + } + + public void setVarRelevance(Double varRelevance) { + this.varRelevance = varRelevance; + } + + public boolean isDynamic() { + return dynamic; + } + + public void setDynamic(boolean dynamic) { + this.dynamic = dynamic; + } + public Date getCreated() { return created; } @@ -144,7 +174,8 @@ public class TopicFull implements Model<ObjectId>, Serializable { @Override public String toString() { return "TopicFull [id=" + id + ", name=" + name + ", index=" + index + ", words=" + words + ", sequences=" - + sequences + ", created=" + created + ", modified=" + modified + "]"; + + sequences + ", avgRelevance=" + avgRelevance + ", varRelevance=" + varRelevance + ", created=" + + created + ", modified=" + modified + "]"; } } -- GitLab