added missing public dir

added missing ui public dir updated dtm analyzer, unfinished updated count map

added missing public dir
5bd458e3 · Eike Cochu · f9994fea · 5bd458e3 · 5bd458e3 · 5bd458e3
Commit 5bd458e3 authored 9 years ago by Eike Cochu
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/DTMAnalyzer.java
@@ -9,7 +9,10 @@ import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

@@ -23,12 +26,14 @@ import de.vipra.cmd.file.DTMVocabulary;
 import de.vipra.cmd.file.FilebaseIndex;
 import de.vipra.util.Config;
 import de.vipra.util.Constants;
+import de.vipra.util.CountMap;
 import de.vipra.util.FileUtils;
 import de.vipra.util.StringUtils;
 import de.vipra.util.ex.ConfigException;
 import de.vipra.util.ex.DatabaseException;
 import de.vipra.util.model.ArticleFull;
 import de.vipra.util.model.Sequence;
+import de.vipra.util.model.Topic;
 import de.vipra.util.model.TopicFull;
 import de.vipra.util.model.TopicRef;
 import de.vipra.util.model.TopicWord;
@@ -161,47 +166,76 @@ public class DTMAnalyzer extends Analyzer {

 			// read topic definition files and create topics

+			Map<Word, Topic> topicWordMap = new HashMap<>(vocab.size());
 			List<TopicFull> newTopics = new ArrayList<>(Constants.K_TOPICS);
 			List<Word> newWords = new ArrayList<>(vocab.size());
 			int sequencesCount = sequences.size();

-			// for each topic
+			// for each topic file
 			for (int i = 0; i < Constants.K_TOPICS; i++) {
 				File seqFile = new File(outDirSeq, "topic-" + StringUtils.padNumber(i, 3) + "-var-e-log-prob.dat");

 				int lineCount = FileUtils.countLines(seqFile);
-				int wordsPerSequence = lineCount / sequencesCount;
+				int wordsCount = lineCount / sequencesCount;

-				if (wordsPerSequence * sequencesCount != lineCount) {
+				if (wordsCount * sequencesCount != lineCount) {
 					log.error("unexpected number of words per sequence");
 					continue;
 				}

+				// create new topic
+				TopicFull newTopic = new TopicFull();
+				List<Sequence> newSequences = new ArrayList<>(sequencesCount);
+				List<TopicWord> newTopicWords = new ArrayList<>(wordsCount);
+				newTopic.setSequences(newSequences);
+				newTopic.setWords(newTopicWords);
+				newTopics.add(newTopic);
+
 				in = new BufferedReader(new InputStreamReader(new FileInputStream(seqFile)));

 				// read file lines into word x sequence matrix
-				// gather maximum likeliness per sequence
-				double[] maxLikelinesses = new double[sequencesCount];
-				double[][] likelinesses = new double[wordsPerSequence][sequencesCount];
-				for (int idxWord = 0; idxWord < wordsPerSequence; idxWord++) {
+				// gather maximum likeliness per sequence and per word
+				double[] maxSeqLikelinesses = new double[sequencesCount];
+				double[] maxWordLikelinesses = new double[wordsCount];
+				double[][] likelinesses = new double[wordsCount][sequencesCount];
+				for (int idxWord = 0; idxWord < wordsCount; idxWord++) {
 					for (int idxSeq = 0; idxSeq < sequencesCount; idxSeq++) {
 						double likeliness = Double.parseDouble(in.readLine());
 						likelinesses[idxWord][idxSeq] = likeliness;
-						if (likeliness > maxLikelinesses[idxSeq])
-							maxLikelinesses[idxSeq] = likeliness;
+						if (likeliness > maxSeqLikelinesses[idxSeq])
+							maxSeqLikelinesses[idxSeq] = likeliness;
+						if (likeliness > maxWordLikelinesses[idxWord])
+							maxWordLikelinesses[idxWord] = likeliness;
 					}
 				}

 				in.close();

-				List<Sequence> newSequences = new ArrayList<>(sequencesCount);
+				// find maximum overall likeliness
+				double maxOverallLikeliness = 0;
+				for (double likeliness : maxSeqLikelinesses) {
+					if (likeliness > maxOverallLikeliness)
+						maxOverallLikeliness = likeliness;
+				}

+				// static topic
+				// most likely words form the static topic over all sequences
+				for (int idxWord = 0; idxWord < wordsCount; idxWord++) {
+					if (maxWordLikelinesses[idxWord] >= Constants.MINIMUM_RELATIVE_PROB * maxOverallLikeliness) {
+						Word newWord = new Word(vocab.get(idxWord));
+						newWords.add(newWord);
+						TopicWord newTopicWord = new TopicWord(newWord, maxWordLikelinesses[idxWord]);
+						newTopicWords.add(newTopicWord);
+					}
+				}
+
+				// dynamic topics
 				// go through each sequence and gather all words that are above
 				// the minimum relative word likeliness
 				for (int idxSeq = 0; idxSeq < sequencesCount; idxSeq++) {
-					double maxLikeliness = maxLikelinesses[idxSeq];
-					List<TopicWord> newSeqTopicWords = new ArrayList<>(wordsPerSequence);
-					for (int idxWord = 0; idxWord < wordsPerSequence; idxWord++) {
+					double maxLikeliness = maxSeqLikelinesses[idxSeq];
+					List<TopicWord> newSeqTopicWords = new ArrayList<>(wordsCount);
+					for (int idxWord = 0; idxWord < wordsCount; idxWord++) {
 						double likeliness = likelinesses[idxWord][idxSeq];
 						if (likeliness >= Constants.MINIMUM_RELATIVE_PROB * maxLikeliness) {
 							Word newWord = new Word(vocab.get(idxWord));
@@ -216,15 +250,7 @@ public class DTMAnalyzer extends Analyzer {
 					newSequence.setNumber(idxSeq);
 					newSequence.setWords(newSeqTopicWords);
 					newSequences.add(newSequence);
-
-					// TODO gather words for static topic
 				}
-
-				TopicFull newTopic = new TopicFull();
-				newTopic.setSequences(newSequences);
-				newTopics.add(newTopic);
-
-				// TODO add words to static topic
 			}

 			// recreate topics and words
@@ -246,17 +272,39 @@ public class DTMAnalyzer extends Analyzer {

 			// for each article in the model file
 			while ((line = in.readLine()) != null) {
-				List<TopicRef> newTopicRefs = new ArrayList<>();
-
-				// extract word:count pairs
+				// extract unique word ids and count
+				CountMap<Integer> countMap = new CountMap<>();
 				Matcher matcher = wordCountPattern.matcher(line);
+				double totalCount = 0;
 				while (matcher.find()) {
-					int idxWord = Integer.parseInt(matcher.group(1));
-					int wordCount = Integer.parseInt(matcher.group(2));
+					int count = Integer.parseInt(matcher.group(2));
+					countMap.count(Integer.parseInt(matcher.group(1)), count);
+					totalCount += count;
+				}

-					// TODO find topic/s of word, add as reference/s
+				// create list of topics refs referencing topics with counted
+				// occurrences, sum accepted topic word count
+				long reducedCount = 0;
+				List<TopicRef> newTopicRefs = new ArrayList<>(countMap.size());
+				for (Entry<Integer, Integer> entry : countMap.entrySet()) {
+					// check if topic above threshold
+					if ((entry.getValue() / totalCount) >= Constants.TOPIC_THRESHOLD) {
+						reducedCount += entry.getValue();
+						TopicFull topic = null;
+						// TODO find topic of this word
+						if (topic != null) {
+							TopicRef ref = new TopicRef();
+							ref.setCount(entry.getValue());
+							ref.setTopic(new Topic(topic.getId()));
+							newTopicRefs.add(ref);
+						}
+					}
 				}

+				// calculate each accepted topic share
+				for (TopicRef ref : newTopicRefs)
+					ref.setShare((double) ref.getCount() / reducedCount);
+
 				if (!newTopicRefs.isEmpty()) {
 					Collections.sort(newTopicRefs, Comparator.reverseOrder());

@@ -274,7 +322,7 @@ public class DTMAnalyzer extends Analyzer {
 				}
 			}

-			// TODO create topic references
+			in.close();

 		} catch (IOException | InterruptedException e) {
 			throw new AnalyzerException(e);

--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbAnalyzer.java
@@ -202,6 +202,10 @@ public class JGibbAnalyzer extends Analyzer {
 					if ((entry.getValue() / totalCount) >= Constants.TOPIC_THRESHOLD) {
 						reducedCount += entry.getValue();
 						TopicFull topic = newTopics.get(Integer.parseInt(entry.getKey()));
+						// TODO words with low relative likeliness are ignored.
+						// topic references from this file are possibly wrong.
+						// fix this by checking if the word is actually accepted
+						// by the referenced topic.
 						TopicRef ref = new TopicRef();
 						ref.setCount(entry.getValue());
 						ref.setTopic(new Topic(topic.getId()));
@@ -229,7 +233,9 @@ public class JGibbAnalyzer extends Analyzer {
 					}
 				}
 			}
+
 			in.close();
+
 		} catch (IOException e) {
 			throw new AnalyzerException(e);
 		}

--- a/vipra-ui/.gitignore
+++ b/vipra-ui/.gitignore
-node_modules/
-bower_components/
-public/
\ No newline at end of file
+/node_modules/
+/bower_components/
+/public/
--- a/vipra-ui/app/public/android-chrome-144x144.png
+++ b/vipra-ui/app/public/android-chrome-144x144.png
--- a/vipra-ui/app/public/android-chrome-192x192.png
+++ b/vipra-ui/app/public/android-chrome-192x192.png
--- a/vipra-ui/app/public/android-chrome-36x36.png
+++ b/vipra-ui/app/public/android-chrome-36x36.png
--- a/vipra-ui/app/public/android-chrome-48x48.png
+++ b/vipra-ui/app/public/android-chrome-48x48.png
--- a/vipra-ui/app/public/android-chrome-72x72.png
+++ b/vipra-ui/app/public/android-chrome-72x72.png
--- a/vipra-ui/app/public/android-chrome-96x96.png
+++ b/vipra-ui/app/public/android-chrome-96x96.png
--- a/vipra-ui/app/public/apple-touch-icon-114x114.png
+++ b/vipra-ui/app/public/apple-touch-icon-114x114.png
--- a/vipra-ui/app/public/apple-touch-icon-120x120.png
+++ b/vipra-ui/app/public/apple-touch-icon-120x120.png
--- a/vipra-ui/app/public/apple-touch-icon-144x144.png
+++ b/vipra-ui/app/public/apple-touch-icon-144x144.png
--- a/vipra-ui/app/public/apple-touch-icon-152x152.png
+++ b/vipra-ui/app/public/apple-touch-icon-152x152.png
--- a/vipra-ui/app/public/apple-touch-icon-180x180.png
+++ b/vipra-ui/app/public/apple-touch-icon-180x180.png
--- a/vipra-ui/app/public/apple-touch-icon-57x57.png
+++ b/vipra-ui/app/public/apple-touch-icon-57x57.png
--- a/vipra-ui/app/public/apple-touch-icon-60x60.png
+++ b/vipra-ui/app/public/apple-touch-icon-60x60.png
--- a/vipra-ui/app/public/apple-touch-icon-72x72.png
+++ b/vipra-ui/app/public/apple-touch-icon-72x72.png
--- a/vipra-ui/app/public/apple-touch-icon-76x76.png
+++ b/vipra-ui/app/public/apple-touch-icon-76x76.png
--- a/vipra-ui/app/public/apple-touch-icon-precomposed.png
+++ b/vipra-ui/app/public/apple-touch-icon-precomposed.png
--- a/vipra-ui/app/public/apple-touch-icon.png
+++ b/vipra-ui/app/public/apple-touch-icon.png