Skip to content
Snippets Groups Projects
Commit 5bd458e3 authored by Eike Cochu's avatar Eike Cochu
Browse files

added missing public dir

added missing ui public dir
updated dtm analyzer, unfinished
updated count map
parent f9994fea
Branches
No related tags found
No related merge requests found
Showing
with 85 additions and 31 deletions
......@@ -9,7 +9,10 @@ import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
......@@ -23,12 +26,14 @@ import de.vipra.cmd.file.DTMVocabulary;
import de.vipra.cmd.file.FilebaseIndex;
import de.vipra.util.Config;
import de.vipra.util.Constants;
import de.vipra.util.CountMap;
import de.vipra.util.FileUtils;
import de.vipra.util.StringUtils;
import de.vipra.util.ex.ConfigException;
import de.vipra.util.ex.DatabaseException;
import de.vipra.util.model.ArticleFull;
import de.vipra.util.model.Sequence;
import de.vipra.util.model.Topic;
import de.vipra.util.model.TopicFull;
import de.vipra.util.model.TopicRef;
import de.vipra.util.model.TopicWord;
......@@ -161,47 +166,76 @@ public class DTMAnalyzer extends Analyzer {
// read topic definition files and create topics
Map<Word, Topic> topicWordMap = new HashMap<>(vocab.size());
List<TopicFull> newTopics = new ArrayList<>(Constants.K_TOPICS);
List<Word> newWords = new ArrayList<>(vocab.size());
int sequencesCount = sequences.size();
// for each topic
// for each topic file
for (int i = 0; i < Constants.K_TOPICS; i++) {
File seqFile = new File(outDirSeq, "topic-" + StringUtils.padNumber(i, 3) + "-var-e-log-prob.dat");
int lineCount = FileUtils.countLines(seqFile);
int wordsPerSequence = lineCount / sequencesCount;
int wordsCount = lineCount / sequencesCount;
if (wordsPerSequence * sequencesCount != lineCount) {
if (wordsCount * sequencesCount != lineCount) {
log.error("unexpected number of words per sequence");
continue;
}
// create new topic
TopicFull newTopic = new TopicFull();
List<Sequence> newSequences = new ArrayList<>(sequencesCount);
List<TopicWord> newTopicWords = new ArrayList<>(wordsCount);
newTopic.setSequences(newSequences);
newTopic.setWords(newTopicWords);
newTopics.add(newTopic);
in = new BufferedReader(new InputStreamReader(new FileInputStream(seqFile)));
// read file lines into word x sequence matrix
// gather maximum likeliness per sequence
double[] maxLikelinesses = new double[sequencesCount];
double[][] likelinesses = new double[wordsPerSequence][sequencesCount];
for (int idxWord = 0; idxWord < wordsPerSequence; idxWord++) {
// gather maximum likeliness per sequence and per word
double[] maxSeqLikelinesses = new double[sequencesCount];
double[] maxWordLikelinesses = new double[wordsCount];
double[][] likelinesses = new double[wordsCount][sequencesCount];
for (int idxWord = 0; idxWord < wordsCount; idxWord++) {
for (int idxSeq = 0; idxSeq < sequencesCount; idxSeq++) {
double likeliness = Double.parseDouble(in.readLine());
likelinesses[idxWord][idxSeq] = likeliness;
if (likeliness > maxLikelinesses[idxSeq])
maxLikelinesses[idxSeq] = likeliness;
if (likeliness > maxSeqLikelinesses[idxSeq])
maxSeqLikelinesses[idxSeq] = likeliness;
if (likeliness > maxWordLikelinesses[idxWord])
maxWordLikelinesses[idxWord] = likeliness;
}
}
in.close();
List<Sequence> newSequences = new ArrayList<>(sequencesCount);
// find maximum overall likeliness
double maxOverallLikeliness = 0;
for (double likeliness : maxSeqLikelinesses) {
if (likeliness > maxOverallLikeliness)
maxOverallLikeliness = likeliness;
}
// static topic
// most likely words form the static topic over all sequences
for (int idxWord = 0; idxWord < wordsCount; idxWord++) {
if (maxWordLikelinesses[idxWord] >= Constants.MINIMUM_RELATIVE_PROB * maxOverallLikeliness) {
Word newWord = new Word(vocab.get(idxWord));
newWords.add(newWord);
TopicWord newTopicWord = new TopicWord(newWord, maxWordLikelinesses[idxWord]);
newTopicWords.add(newTopicWord);
}
}
// dynamic topics
// go through each sequence and gather all words that are above
// the minimum relative word likeliness
for (int idxSeq = 0; idxSeq < sequencesCount; idxSeq++) {
double maxLikeliness = maxLikelinesses[idxSeq];
List<TopicWord> newSeqTopicWords = new ArrayList<>(wordsPerSequence);
for (int idxWord = 0; idxWord < wordsPerSequence; idxWord++) {
double maxLikeliness = maxSeqLikelinesses[idxSeq];
List<TopicWord> newSeqTopicWords = new ArrayList<>(wordsCount);
for (int idxWord = 0; idxWord < wordsCount; idxWord++) {
double likeliness = likelinesses[idxWord][idxSeq];
if (likeliness >= Constants.MINIMUM_RELATIVE_PROB * maxLikeliness) {
Word newWord = new Word(vocab.get(idxWord));
......@@ -216,15 +250,7 @@ public class DTMAnalyzer extends Analyzer {
newSequence.setNumber(idxSeq);
newSequence.setWords(newSeqTopicWords);
newSequences.add(newSequence);
// TODO gather words for static topic
}
TopicFull newTopic = new TopicFull();
newTopic.setSequences(newSequences);
newTopics.add(newTopic);
// TODO add words to static topic
}
// recreate topics and words
......@@ -246,17 +272,39 @@ public class DTMAnalyzer extends Analyzer {
// for each article in the model file
while ((line = in.readLine()) != null) {
List<TopicRef> newTopicRefs = new ArrayList<>();
// extract word:count pairs
// extract unique word ids and count
CountMap<Integer> countMap = new CountMap<>();
Matcher matcher = wordCountPattern.matcher(line);
double totalCount = 0;
while (matcher.find()) {
int idxWord = Integer.parseInt(matcher.group(1));
int wordCount = Integer.parseInt(matcher.group(2));
int count = Integer.parseInt(matcher.group(2));
countMap.count(Integer.parseInt(matcher.group(1)), count);
totalCount += count;
}
// TODO find topic/s of word, add as reference/s
// create list of topics refs referencing topics with counted
// occurrences, sum accepted topic word count
long reducedCount = 0;
List<TopicRef> newTopicRefs = new ArrayList<>(countMap.size());
for (Entry<Integer, Integer> entry : countMap.entrySet()) {
// check if topic above threshold
if ((entry.getValue() / totalCount) >= Constants.TOPIC_THRESHOLD) {
reducedCount += entry.getValue();
TopicFull topic = null;
// TODO find topic of this word
if (topic != null) {
TopicRef ref = new TopicRef();
ref.setCount(entry.getValue());
ref.setTopic(new Topic(topic.getId()));
newTopicRefs.add(ref);
}
}
}
// calculate each accepted topic share
for (TopicRef ref : newTopicRefs)
ref.setShare((double) ref.getCount() / reducedCount);
if (!newTopicRefs.isEmpty()) {
Collections.sort(newTopicRefs, Comparator.reverseOrder());
......@@ -274,7 +322,7 @@ public class DTMAnalyzer extends Analyzer {
}
}
// TODO create topic references
in.close();
} catch (IOException | InterruptedException e) {
throw new AnalyzerException(e);
......
......@@ -202,6 +202,10 @@ public class JGibbAnalyzer extends Analyzer {
if ((entry.getValue() / totalCount) >= Constants.TOPIC_THRESHOLD) {
reducedCount += entry.getValue();
TopicFull topic = newTopics.get(Integer.parseInt(entry.getKey()));
// TODO words with low relative likeliness are ignored.
// topic references from this file are possibly wrong.
// fix this by checking if the word is actually accepted
// by the referenced topic.
TopicRef ref = new TopicRef();
ref.setCount(entry.getValue());
ref.setTopic(new Topic(topic.getId()));
......@@ -229,7 +233,9 @@ public class JGibbAnalyzer extends Analyzer {
}
}
}
in.close();
} catch (IOException e) {
throw new AnalyzerException(e);
}
......
node_modules/
bower_components/
public/
\ No newline at end of file
/node_modules/
/bower_components/
/public/
vipra-ui/app/public/android-chrome-144x144.png

6.11 KiB

vipra-ui/app/public/android-chrome-192x192.png

8.4 KiB

vipra-ui/app/public/android-chrome-36x36.png

1.47 KiB

vipra-ui/app/public/android-chrome-48x48.png

1.91 KiB

vipra-ui/app/public/android-chrome-72x72.png

2.83 KiB

vipra-ui/app/public/android-chrome-96x96.png

3.84 KiB

vipra-ui/app/public/apple-touch-icon-114x114.png

3.64 KiB

vipra-ui/app/public/apple-touch-icon-120x120.png

3.85 KiB

vipra-ui/app/public/apple-touch-icon-144x144.png

4.58 KiB

vipra-ui/app/public/apple-touch-icon-152x152.png

4.89 KiB

vipra-ui/app/public/apple-touch-icon-180x180.png

5.88 KiB

vipra-ui/app/public/apple-touch-icon-57x57.png

1.81 KiB

vipra-ui/app/public/apple-touch-icon-60x60.png

1.92 KiB

vipra-ui/app/public/apple-touch-icon-72x72.png

2.27 KiB

vipra-ui/app/public/apple-touch-icon-76x76.png

2.34 KiB

vipra-ui/app/public/apple-touch-icon-precomposed.png

6.91 KiB

vipra-ui/app/public/apple-touch-icon.png

5.88 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment