Skip to content
Snippets Groups Projects
Commit 6f6a13a4 authored by Eike Cochu's avatar Eike Cochu
Browse files

added missing lines to topic name generation

parent af54b13c
Branches
No related tags found
No related merge requests found
...@@ -202,16 +202,16 @@ public class DTMAnalyzer extends Analyzer { ...@@ -202,16 +202,16 @@ public class DTMAnalyzer extends Analyzer {
} }
} }
// collect top n words
if (!newSeqTopicWords.isEmpty()) { if (!newSeqTopicWords.isEmpty()) {
Collections.sort(newSeqTopicWords, Comparator.reverseOrder()); Collections.sort(newSeqTopicWords, Comparator.reverseOrder());
// top n percent cutoff // top n percent cutoff
if (seqPercentCutoff) { if (seqPercentCutoff) {
final int fromIndex = (int) Math.round(newSeqTopicWords.size() * Constants.PERCENT_PROB); final int fromIndex = (int) Math.round(newSeqTopicWords.size() * Constants.PERCENT_PROB);
newSeqTopicWords.subList(fromIndex, newSeqTopicWords.size()); newSeqTopicWords.subList(fromIndex, newSeqTopicWords.size()).clear();
} }
// collect top words
topTopicWords.addAll(newSeqTopicWords.subList(0, topTopicWords.addAll(newSeqTopicWords.subList(0,
Math.min(newSeqTopicWords.size(), Constants.TOPIC_AUTO_NAMING_WORDS))); Math.min(newSeqTopicWords.size(), Constants.TOPIC_AUTO_NAMING_WORDS)));
} }
......
...@@ -98,6 +98,9 @@ public class JGibbAnalyzer extends Analyzer { ...@@ -98,6 +98,9 @@ public class JGibbAnalyzer extends Analyzer {
estimator.init(options); estimator.init(options);
estimator.estimate(); estimator.estimate();
final boolean seqRelativeCutoff = Constants.MINIMUM_RELATIVE_PROB > 0;
final boolean seqPercentCutoff = Constants.PERCENT_PROB < 1;
// read topic definitions and save // read topic definitions and save
final File twords = new File(modelDir, NAME + ".twords"); final File twords = new File(modelDir, NAME + ".twords");
...@@ -146,18 +149,24 @@ public class JGibbAnalyzer extends Analyzer { ...@@ -146,18 +149,24 @@ public class JGibbAnalyzer extends Analyzer {
final double maxLikeliness = maxLikelinesses[topicIndex]; final double maxLikeliness = maxLikelinesses[topicIndex];
final ArrayList<TopicWord> filteredTopicWords = new ArrayList<>(topic.getWords().size()); final ArrayList<TopicWord> filteredTopicWords = new ArrayList<>(topic.getWords().size());
for (final TopicWord word : topic.getWords()) { for (final TopicWord word : topic.getWords()) {
if (word.getLikeliness() >= Constants.MINIMUM_RELATIVE_PROB * maxLikeliness) { if (!seqRelativeCutoff || word.getLikeliness() >= Constants.MINIMUM_RELATIVE_PROB * maxLikeliness) {
filteredTopicWords.add(word); filteredTopicWords.add(word);
newWords.add(word.getWord()); newWords.add(word.getWord());
} }
} }
topic.setWords(filteredTopicWords); topic.setWords(filteredTopicWords);
}
// sort topic words and generate topic name if (!filteredTopicWords.isEmpty()) {
for (final TopicFull topic : newTopics) { Collections.sort(filteredTopicWords, Collections.reverseOrder());
Collections.sort(topic.getWords(), Collections.reverseOrder());
topic.setName(TopicFull.getNameFromWords(topic.getWords())); // top n percent cutoff
if (seqPercentCutoff) {
final int fromIndex = (int) Math.round(filteredTopicWords.size() * Constants.PERCENT_PROB);
filteredTopicWords.subList(fromIndex, filteredTopicWords.size()).clear();
}
topic.setName(TopicFull.getNameFromWords(filteredTopicWords));
}
} }
// recreate topics and words // recreate topics and words
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment