Skip to content
Snippets Groups Projects
  • Eike Cochu's avatar
    085a1da8
    updated rest service · 085a1da8
    Eike Cochu authored
    added response shortcuts
    batch creating topics and words on import
    using words themselves as database ids to skip prior creation
    added words resource
    added rest service pagination
    set default for topics and topic words in topic modeling
    moved wordmap to proper package
    added 10-articles test json
    added exception handling to rest requests
    added jackson to utils package for annotations
    removed jsonfield,jsonignore annotations, already exists in jackson
    085a1da8
    History
    updated rest service
    Eike Cochu authored
    added response shortcuts
    batch creating topics and words on import
    using words themselves as database ids to skip prior creation
    added words resource
    added rest service pagination
    set default for topics and topic words in topic modeling
    moved wordmap to proper package
    added 10-articles test json
    added exception handling to rest requests
    added jackson to utils package for annotations
    removed jsonfield,jsonignore annotations, already exists in jackson
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
LDAAnalyzer.java 1.78 KiB
package de.vipra.cmd.lda;

import java.util.List;

import de.vipra.cmd.ex.LDAAnalyzerException;
import de.vipra.util.Config;
import de.vipra.util.Config.Key;
import de.vipra.util.Constants;
import de.vipra.util.ConvertStream;
import de.vipra.util.WordMap;
import de.vipra.util.model.TopicFull;
import de.vipra.util.model.TopicRef;

public abstract class LDAAnalyzer {

	private final String name;

	protected LDAAnalyzer(String name) {
		this.name = name;
	}

	public String getName() {
		return name;
	}

	public abstract void init(Config config, WordMap wordMap) throws LDAAnalyzerException;

	public abstract void analyze() throws LDAAnalyzerException;

	/**
	 * Returns a converting stream of topics, read from the topic definition
	 * file. Usually, a topic definition consists of a list of words, that are
	 * assigned to that topic with a certain likeliness.
	 * 
	 * @return topic definition stream
	 * @throws LDAAnalyzerException
	 */
	public abstract ConvertStream<TopicFull> getTopicDefinitions() throws LDAAnalyzerException;

	/**
	 * Returns a converting stream of lists of topic references. Normally, topic
	 * modeling outputs topics for each word of each document. These references
	 * are returned by this function.
	 * 
	 * @return stream of lists of topic references per document (ordered by
	 *         index)
	 * @throws LDAAnalyzerException
	 */
	public abstract ConvertStream<List<TopicRef>> getTopics() throws LDAAnalyzerException;

	public static LDAAnalyzer getAnalyzer(Config config, WordMap wordMap) throws LDAAnalyzerException {
		LDAAnalyzer analyzer = null;
		switch (Constants.Analyzer.fromString(config.getString(Key.ANALYZER))) {
			case JGIBB:
			case DEFAULT:
			default:
				analyzer = new JGibbLDAAnalyzer();
				break;
		}
		analyzer.init(config, wordMap);
		return analyzer;
	}

}