From 98076556c7f1f33096e35ca33ee218c27baf26f1 Mon Sep 17 00:00:00 2001
From: Eike Cochu <eike@cochu.com>
Date: Sat, 2 Jan 2016 15:01:43 +0100
Subject: [PATCH] updated

---
 TODO                                          |  3 +
 .../main/java/de/vipra/cmd/file/Filebase.java |  2 -
 .../java/de/vipra/cmd/file/FilebaseIndex.java | 14 +++-
 .../de/vipra/cmd/file/FilebaseVocabulary.java | 10 ++-
 .../java/de/vipra/cmd/file/JGibbFilebase.java | 41 ++++++++----
 .../java/de/vipra/cmd/file/LdacFilebase.java  |  4 +-
 .../de/vipra/cmd/lda/JGibbLDAAnalyzer.java    | 58 ++++++++++++++++-
 .../java/de/vipra/cmd/lda/LDAAnalyzer.java    |  9 ++-
 .../de/vipra/cmd/lda/LdacLDAAnalyzer.java     | 23 -------
 .../de/vipra/cmd/option/ImportCommand.java    | 36 +++++-----
 .../de/vipra/cmd/option/StatsCommand.java     |  2 +-
 .../main/java/de/vipra/util/Constants.java    |  4 +-
 .../de/vipra/util/model/ArticleStats.java     |  3 +
 .../main/java/de/vipra/util/model/Topic.java  | 65 +++++++++++++++++++
 .../java/de/vipra/util/model/TopicWord.java   | 31 +++++++++
 15 files changed, 236 insertions(+), 69 deletions(-)
 delete mode 100644 vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java
 create mode 100644 vipra-util/src/main/java/de/vipra/util/model/Topic.java
 create mode 100644 vipra-util/src/main/java/de/vipra/util/model/TopicWord.java

diff --git a/TODO b/TODO
index 1da64172..286b0293 100644
--- a/TODO
+++ b/TODO
@@ -2,6 +2,9 @@ cmd
   ☐ implement delete operation
   ☐ implement filebase remove
   ☐ implement elasticsearch indexing
+  ☐ allow other document input formats
+  ☐ do not read whole file into memory
+  ☐ on save topics: retain topic names?
 
 rest
   ☐ implement etag caching
\ No newline at end of file
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java
index 02dcda7b..2ea001f7 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java
@@ -66,8 +66,6 @@ public abstract class Filebase implements Closeable {
 	public static Filebase getFilebase(Config config) throws FilebaseException, ConfigException {
 		File dataDir = config.getDataDirectory();
 		switch (Constants.Analyzer.fromString(config.getString(Key.ANALYZER))) {
-			case LDAC:
-				return new LdacFilebase(dataDir);
 			case JGIBB:
 			case DEFAULT:
 			default:
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java
index 516a5caa..dae34993 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseIndex.java
@@ -4,12 +4,13 @@ import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
 import de.vipra.util.Constants;
 import de.vipra.util.FileUtils;
 
-public class FilebaseIndex implements Closeable {
+public class FilebaseIndex implements Closeable, Iterable<String> {
 
 	private final File file;
 	private final List<String> index;
@@ -23,7 +24,7 @@ public class FilebaseIndex implements Closeable {
 		}
 	}
 
-	private void write() throws IOException {
+	public void write() throws IOException {
 		FileUtils.writeLines(file, Constants.FB_ENCODING.name(), index, null, false);
 	}
 
@@ -40,6 +41,10 @@ public class FilebaseIndex implements Closeable {
 		return index.indexOf(id);
 	}
 
+	public String get(int i) {
+		return index.get(i);
+	}
+
 	public boolean remove(String id) {
 		return index.remove(id);
 	}
@@ -49,4 +54,9 @@ public class FilebaseIndex implements Closeable {
 		write();
 	}
 
+	@Override
+	public Iterator<String> iterator() {
+		return index.iterator();
+	}
+
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java
index 227fa33c..a910d941 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseVocabulary.java
@@ -4,12 +4,13 @@ import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
 import de.vipra.util.Constants;
 import de.vipra.util.FileUtils;
 
-public class FilebaseVocabulary implements Closeable {
+public class FilebaseVocabulary implements Closeable, Iterable<String> {
 
 	private File file;
 	private List<String> vocables;
@@ -23,7 +24,7 @@ public class FilebaseVocabulary implements Closeable {
 		}
 	}
 
-	private void write() throws IOException {
+	public void write() throws IOException {
 		FileUtils.writeLines(file, Constants.FB_ENCODING.name(), vocables, null, false);
 	}
 
@@ -49,4 +50,9 @@ public class FilebaseVocabulary implements Closeable {
 		return vocables.indexOf(word);
 	}
 
+	@Override
+	public Iterator<String> iterator() {
+		return vocables.iterator();
+	}
+
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
index b92fcf9a..1195e099 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
@@ -17,6 +17,8 @@ public class JGibbFilebase extends Filebase {
 	private final FilebaseVocabulary vocab;
 	private final List<Article> articles;
 
+	private final int bufferMaxSize = 100;
+
 	public JGibbFilebase(File dataDir) throws FilebaseException {
 		super(dataDir, "jgibb");
 		this.modelFile = getModelFile();
@@ -26,11 +28,19 @@ public class JGibbFilebase extends Filebase {
 	}
 
 	@Override
-	public void add(Article article) {
+	public void add(Article article) throws FilebaseException {
 		String[] words = article.getProcessedText().getText().split("\\s+");
 		vocab.addVocabulary(words);
 		index.add(article.getId());
 		articles.add(article);
+
+		if (articles.size() >= bufferMaxSize) {
+			try {
+				write();
+			} catch (IOException e) {
+				throw new FilebaseException(e);
+			}
+		}
 	}
 
 	@Override
@@ -40,20 +50,23 @@ public class JGibbFilebase extends Filebase {
 
 	@Override
 	public void write() throws IOException {
-		boolean linesep = modelFile.exists();
-		RandomAccessFile raf = new RandomAccessFile(modelFile, "rw");
-
-		// write articles
-		raf.seek(raf.length());
-		for (Article a : articles) {
-			if (linesep)
-				raf.writeBytes(System.lineSeparator());
-			else
-				linesep = true;
-			raf.writeBytes(a.getProcessedText().getText());
-		}
+		if (!articles.isEmpty()) {
+			boolean linesep = modelFile.exists();
+			RandomAccessFile raf = new RandomAccessFile(modelFile, "rw");
+
+			// write articles
+			raf.seek(raf.length());
+			for (Article a : articles) {
+				if (linesep)
+					raf.writeBytes(System.lineSeparator());
+				else
+					linesep = true;
+				raf.writeBytes(a.getProcessedText().getText());
+			}
 
-		raf.close();
+			raf.close();
+			articles.clear();
+		}
 	}
 
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java
index 67b210a9..2360d4af 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/LdacFilebase.java
@@ -15,13 +15,13 @@ public class LdacFilebase extends Filebase {
 	}
 
 	@Override
-	public void add(Article article) {
+	public void add(Article article) throws FilebaseException {
 		// TODO Auto-generated method stub
 
 	}
 
 	@Override
-	public void remove(String id) {
+	public void remove(String id) throws FilebaseException {
 		throw new NotImplementedException();
 	}
 
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java
index 41e21180..ffebc4d5 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java
@@ -1,13 +1,25 @@
 package de.vipra.cmd.lda;
 
+import java.io.BufferedReader;
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import de.vipra.cmd.ex.LDAAnalyzerException;
+import de.vipra.cmd.model.Article;
 import de.vipra.util.Config;
+import de.vipra.util.FileUtils;
 import de.vipra.util.ex.ConfigException;
+import de.vipra.util.ex.DatabaseException;
+import de.vipra.util.model.Topic;
+import de.vipra.util.model.TopicWord;
+import de.vipra.util.service.DatabaseService;
 import jgibblda.Estimator;
 import jgibblda.Inferencer;
 import jgibblda.LDACmdOption;
@@ -38,7 +50,7 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer {
 
 		modelDir = new File(dataDir, "jgibb");
 		options.dir = modelDir.getAbsolutePath();
-		options.estc = new File(modelDir, "model-final.tassign").exists();
+		options.estc = new File(modelDir, "jgibb.tassign").exists();
 		options.est = !options.estc;
 
 		modelFile = new File(modelDir, "jgibb");
@@ -53,7 +65,6 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer {
 			throw new LDAAnalyzerException("model file does not exist: " + modelFile.getAbsolutePath());
 		}
 		estimate();
-		// inference();
 	}
 
 	private void estimate() {
@@ -62,10 +73,53 @@ public class JGibbLDAAnalyzer extends LDAAnalyzer {
 		estimator.estimate();
 	}
 
+	@SuppressWarnings("unused")
 	private void inference() {
 		Inferencer inferencer = new Inferencer();
 		inferencer.init(options);
 		Model newModel = inferencer.inference();
 	}
 
+	private List<Topic> readTopics() throws IOException {
+		File twords = new File(modelDir, "jgibb.twords");
+		List<String> lines = FileUtils.readFile(twords);
+		List<Topic> topics = new ArrayList<>();
+		List<TopicWord> topicWords = null;
+		for (String line : lines) {
+			if (line.startsWith("\t")) {
+				String[] parts = line.trim().split("\\s+");
+				topicWords.add(new TopicWord(parts[0], Double.parseDouble(parts[1])));
+			} else {
+				if (topicWords != null)
+					topics.add(new Topic(topicWords));
+				topicWords = new ArrayList<>();
+			}
+		}
+		return topics;
+	}
+
+	@Override
+	public void save(DatabaseService<Article> dbArticles, DatabaseService<Topic> dbTopics) throws LDAAnalyzerException {
+		try {
+			List<Topic> topics = readTopics();
+
+			// recreate topics in database
+			dbTopics.drop();
+			for (Topic topic : topics) {
+				dbTopics.createSingle(topic);
+			}
+
+			// read document topics
+			BufferedReader reader = new BufferedReader(
+					new InputStreamReader(new FileInputStream(new File(modelDir, "jgibb.tassign"))));
+			String line;
+			while ((line = reader.readLine()) != null) {
+				String[] parts = line.trim().split("\\s+");
+
+			}
+		} catch (IOException | DatabaseException e) {
+			throw new LDAAnalyzerException(e);
+		}
+	}
+
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java
index b7285a72..c352eb89 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java
@@ -1,8 +1,11 @@
 package de.vipra.cmd.lda;
 
 import de.vipra.cmd.ex.LDAAnalyzerException;
+import de.vipra.cmd.model.Article;
 import de.vipra.util.Config;
 import de.vipra.util.Constants;
+import de.vipra.util.model.Topic;
+import de.vipra.util.service.DatabaseService;
 import de.vipra.util.Config.Key;
 
 public abstract class LDAAnalyzer {
@@ -21,12 +24,12 @@ public abstract class LDAAnalyzer {
 
 	public abstract void analyze() throws LDAAnalyzerException;
 
+	public abstract void save(DatabaseService<Article> dbArticles, DatabaseService<Topic> dbTopics)
+			throws LDAAnalyzerException;
+
 	public static LDAAnalyzer getAnalyzer(Config config) throws LDAAnalyzerException {
 		LDAAnalyzer analyzer = null;
 		switch (Constants.Analyzer.fromString(config.getString(Key.ANALYZER))) {
-			case LDAC:
-				analyzer = new LdacLDAAnalyzer();
-				break;
 			case JGIBB:
 			case DEFAULT:
 			default:
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java
deleted file mode 100644
index 0b65ea36..00000000
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package de.vipra.cmd.lda;
-
-import de.vipra.cmd.ex.LDAAnalyzerException;
-import de.vipra.util.Config;
-
-public class LdacLDAAnalyzer extends LDAAnalyzer {
-
-	protected LdacLDAAnalyzer() {
-		super("lda-c Analyzer");
-	}
-
-	@Override
-	public void init(Config config) throws LDAAnalyzerException {
-		// TODO Auto-generated method stub
-
-	}
-
-	@Override
-	public void analyze() throws LDAAnalyzerException {
-		// TODO Auto-generated method stub
-	}
-
-}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
index a61a8da3..5b6c8226 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
@@ -6,7 +6,6 @@ import java.io.FileReader;
 import java.io.FilenameFilter;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.List;
 
 import org.json.simple.JSONArray;
 import org.json.simple.JSONObject;
@@ -27,6 +26,7 @@ import de.vipra.util.Constants;
 import de.vipra.util.StringUtils;
 import de.vipra.util.Timer;
 import de.vipra.util.model.ArticleStats;
+import de.vipra.util.model.Topic;
 import de.vipra.util.service.DatabaseService;
 
 public class ImportCommand implements Command {
@@ -38,6 +38,7 @@ public class ImportCommand implements Command {
 	private JSONParser parser = new JSONParser();
 	private Config config;
 	private DatabaseService<Article> dbArticles;
+	private DatabaseService<Topic> dbTopics;
 	private Filebase filebase;
 	private Processor preprocessor;
 	private LDAAnalyzer analyzer;
@@ -82,7 +83,7 @@ public class ImportCommand implements Command {
 	 * @return
 	 * @throws ImportException
 	 */
-	Article importArticle(JSONObject obj) throws ImportException {
+	void importArticle(JSONObject obj) throws ImportException {
 		out.debug("importing \"" + StringUtils.ellipsize(obj.get("title").toString(), 80) + "\"");
 		Article article = new Article();
 		article.fromJSON(obj);
@@ -99,8 +100,6 @@ public class ImportCommand implements Command {
 
 			// add article to filebase
 			filebase.add(article);
-
-			return article;
 		} catch (Exception e) {
 			throw new ImportException(e, article.getId());
 		}
@@ -116,21 +115,22 @@ public class ImportCommand implements Command {
 	 * @throws ImportException
 	 * @throws Exception
 	 */
-	private List<Article> importFile(File file)
-			throws FileNotFoundException, IOException, ParseException, ImportException {
+	private long importFile(File file) throws FileNotFoundException, IOException, ParseException, ImportException {
 		Object data = parser.parse(new FileReader(file));
 
-		List<Article> articles = new ArrayList<Article>();
+		long imported = 0;
 
 		if (data instanceof JSONArray) {
 			for (Object object : (JSONArray) data) {
-				articles.add(importArticle((JSONObject) object));
+				importArticle((JSONObject) object);
+				imported++;
 			}
 		} else if (data instanceof JSONObject) {
-			articles.add(importArticle((JSONObject) data));
+			importArticle((JSONObject) data);
+			imported++;
 		}
 
-		return articles;
+		return imported;
 	}
 
 	@Override
@@ -138,6 +138,7 @@ public class ImportCommand implements Command {
 		try {
 			config = Config.getConfig();
 			dbArticles = DatabaseService.getDatabaseService(config, Constants.Collection.ARTICLES, Article.class);
+			dbTopics = DatabaseService.getDatabaseService(config, Constants.Collection.TOPICS, Topic.class);
 			filebase = Filebase.getFilebase(config);
 			preprocessor = Processor.getPreprocessor(config);
 			analyzer = LDAAnalyzer.getAnalyzer(config);
@@ -151,25 +152,28 @@ public class ImportCommand implements Command {
 
 			// import files into database and filebase
 			out.info("file import");
-			List<Article> articles = new ArrayList<>();
+			long imported = 0;
 			for (File file : files) {
-				articles.addAll(importFile(file));
+				imported += importFile(file);
 			}
 			long durImport = timer.lap();
 
 			// write filebase
 			out.info("writing file index");
 			filebase.close();
-			long durIndex = timer.lap();
+			timer.lap();
 
 			// do topic modeling
 			out.info("topic modeling");
 			analyzer.analyze();
 			long durAnalyze = timer.lap();
 
-			out.info("imported " + articles.size() + " " + (articles.size() == 1 ? "article" : "articles"));
-			out.info("import: " + StringUtils.timeString(durImport) + ", analyze: " + StringUtils.timeString(durAnalyze)
-					+ ", reindex: " + StringUtils.timeString(durIndex));
+			out.info("saving topic models");
+			analyzer.save(dbArticles, dbTopics);
+
+			out.info("imported " + imported + " " + (imported == 1 ? "article" : "articles"));
+			out.info("import: " + StringUtils.timeString(durImport) + ", analyze: "
+					+ StringUtils.timeString(durAnalyze));
 		} catch (Exception e) {
 			throw new ExecutionException(e);
 		}
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java
index 6cf6800b..53d7a738 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/StatsCommand.java
@@ -7,7 +7,7 @@ public class StatsCommand implements Command {
 	@Override
 	public void run() throws ExecutionException {
 		// TODO Auto-generated method stub
-		
+
 	}
 
 }
diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java
index 34e8043a..0594c27d 100644
--- a/vipra-util/src/main/java/de/vipra/util/Constants.java
+++ b/vipra-util/src/main/java/de/vipra/util/Constants.java
@@ -30,7 +30,8 @@ public class Constants {
 			"then", "there", "these", "they", "this", "to", "was", "will", "with");
 
 	public static enum Collection {
-		ARTICLES("articles");
+		ARTICLES("articles"),
+		TOPICS("topics");
 
 		public final String name;
 
@@ -67,7 +68,6 @@ public class Constants {
 	}
 
 	public static enum Analyzer {
-		LDAC("ldac"),
 		JGIBB("jgibb"),
 		DEFAULT(JGIBB);
 
diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java
index 2b4cc36c..fca89724 100644
--- a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java
+++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java
@@ -47,6 +47,7 @@ public class ArticleStats implements BsonDocument {
 		stats.setWordCount(words.length);
 		Map<String, TermFrequency> uniqueWords = new HashMap<>();
 		long maxFrequency = 0;
+
 		// loop and count unique words
 		// also remember maximum frequency
 		for (String word : words) {
@@ -60,10 +61,12 @@ public class ArticleStats implements BsonDocument {
 			}
 			uniqueWords.put(word, tf);
 		}
+
 		// normalize frequencies
 		for (Map.Entry<String, TermFrequency> entry : uniqueWords.entrySet()) {
 			entry.getValue().normalizeTermFrequency(maxFrequency);
 		}
+
 		stats.setUniqueWordCount(uniqueWords.size());
 		stats.setUniqueWords(uniqueWords);
 		return stats;
diff --git a/vipra-util/src/main/java/de/vipra/util/model/Topic.java b/vipra-util/src/main/java/de/vipra/util/model/Topic.java
new file mode 100644
index 00000000..168a4c94
--- /dev/null
+++ b/vipra-util/src/main/java/de/vipra/util/model/Topic.java
@@ -0,0 +1,65 @@
+package de.vipra.util.model;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+import org.bson.Document;
+
+public class Topic extends Model {
+
+	private List<String> names;
+	private List<TopicWord> words;
+
+	public Topic() {}
+
+	public Topic(List<TopicWord> words) {
+		this.words = words;
+	}
+
+	public List<String> getNames() {
+		return names;
+	}
+
+	public void setNames(List<String> names) {
+		this.names = names;
+	}
+
+	public List<TopicWord> getWords() {
+		return words;
+	}
+
+	public void setWords(List<TopicWord> words) {
+		this.words = words;
+	}
+
+	@Override
+	public String getType() {
+		return Topic.class.getSimpleName().toLowerCase();
+	}
+
+	@Override
+	public void fromDocument(Document document) {
+		// TODO Auto-generated method stub
+
+	}
+
+	@Override
+	public Document toDocument() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public void fromFile(File file) throws IOException {
+		// TODO Auto-generated method stub
+
+	}
+
+	@Override
+	public String toFileString() {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+}
diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java
new file mode 100644
index 00000000..22fe8ab0
--- /dev/null
+++ b/vipra-util/src/main/java/de/vipra/util/model/TopicWord.java
@@ -0,0 +1,31 @@
+package de.vipra.util.model;
+
+public class TopicWord {
+
+	private String word;
+	private double likeliness;
+
+	public TopicWord() {}
+
+	public TopicWord(String word, double likeliness) {
+		this.word = word;
+		this.likeliness = likeliness;
+	}
+
+	public String getWord() {
+		return word;
+	}
+
+	public void setWord(String word) {
+		this.word = word;
+	}
+
+	public double getLikeliness() {
+		return likeliness;
+	}
+
+	public void setLikeliness(double likeliness) {
+		this.likeliness = likeliness;
+	}
+
+}
-- 
GitLab