Skip to content
Snippets Groups Projects
Commit 6f6a13a4 authored by Eike Cochu's avatar Eike Cochu
Browse files

added missing lines to topic name generation

parent af54b13c
No related branches found
No related tags found
Loading
......@@ -202,16 +202,16 @@ public class DTMAnalyzer extends Analyzer {
}
}
// collect top n words
if (!newSeqTopicWords.isEmpty()) {
Collections.sort(newSeqTopicWords, Comparator.reverseOrder());
// top n percent cutoff
if (seqPercentCutoff) {
final int fromIndex = (int) Math.round(newSeqTopicWords.size() * Constants.PERCENT_PROB);
newSeqTopicWords.subList(fromIndex, newSeqTopicWords.size());
newSeqTopicWords.subList(fromIndex, newSeqTopicWords.size()).clear();
}
// collect top words
topTopicWords.addAll(newSeqTopicWords.subList(0,
Math.min(newSeqTopicWords.size(), Constants.TOPIC_AUTO_NAMING_WORDS)));
}
......
......@@ -98,6 +98,9 @@ public class JGibbAnalyzer extends Analyzer {
estimator.init(options);
estimator.estimate();
final boolean seqRelativeCutoff = Constants.MINIMUM_RELATIVE_PROB > 0;
final boolean seqPercentCutoff = Constants.PERCENT_PROB < 1;
// read topic definitions and save
final File twords = new File(modelDir, NAME + ".twords");
......@@ -146,18 +149,24 @@ public class JGibbAnalyzer extends Analyzer {
final double maxLikeliness = maxLikelinesses[topicIndex];
final ArrayList<TopicWord> filteredTopicWords = new ArrayList<>(topic.getWords().size());
for (final TopicWord word : topic.getWords()) {
if (word.getLikeliness() >= Constants.MINIMUM_RELATIVE_PROB * maxLikeliness) {
if (!seqRelativeCutoff || word.getLikeliness() >= Constants.MINIMUM_RELATIVE_PROB * maxLikeliness) {
filteredTopicWords.add(word);
newWords.add(word.getWord());
}
}
topic.setWords(filteredTopicWords);
}
// sort topic words and generate topic name
for (final TopicFull topic : newTopics) {
Collections.sort(topic.getWords(), Collections.reverseOrder());
topic.setName(TopicFull.getNameFromWords(topic.getWords()));
if (!filteredTopicWords.isEmpty()) {
Collections.sort(filteredTopicWords, Collections.reverseOrder());
// top n percent cutoff
if (seqPercentCutoff) {
final int fromIndex = (int) Math.round(filteredTopicWords.size() * Constants.PERCENT_PROB);
filteredTopicWords.subList(fromIndex, filteredTopicWords.size()).clear();
}
topic.setName(TopicFull.getNameFromWords(filteredTopicWords));
}
}
// recreate topics and words
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment