diff --git a/jgibblda/.classpath b/jgibblda/.classpath
new file mode 100644
index 0000000000000000000000000000000000000000..03f79961c13cd63e03ca08883e00ccda4e98765c
--- /dev/null
+++ b/jgibblda/.classpath
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" path="src"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/jgibblda/.gitignore b/jgibblda/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..09e3bc9b241c477ea341af9ee029becad0c2148c
--- /dev/null
+++ b/jgibblda/.gitignore
@@ -0,0 +1,2 @@
+/bin/
+/target/
diff --git a/jgibblda/.project b/jgibblda/.project
new file mode 100644
index 0000000000000000000000000000000000000000..cf1ec4545d2f6fa8d591550806411223a5dcc415
--- /dev/null
+++ b/jgibblda/.project
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>JGibbLDA</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.m2e.core.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.m2e.core.maven2Nature</nature>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
diff --git a/jgibblda/.settings/org.eclipse.core.resources.prefs b/jgibblda/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 0000000000000000000000000000000000000000..99f26c0203a7844de00dbfc56e6a35d8ed3c022c
--- /dev/null
+++ b/jgibblda/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,2 @@
+eclipse.preferences.version=1
+encoding/<project>=UTF-8
diff --git a/jgibblda/.settings/org.eclipse.jdt.core.prefs b/jgibblda/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000000000000000000000000000000000000..71aa314cccdc8c5610bda147357512c5a43a4d3f
--- /dev/null
+++ b/jgibblda/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,101 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.annotation.inheritNullAnnotations=disabled
+org.eclipse.jdt.core.compiler.annotation.missingNonNullByDefaultAnnotation=ignore
+org.eclipse.jdt.core.compiler.annotation.nonnull=org.eclipse.jdt.annotation.NonNull
+org.eclipse.jdt.core.compiler.annotation.nonnullbydefault=org.eclipse.jdt.annotation.NonNullByDefault
+org.eclipse.jdt.core.compiler.annotation.nullable=org.eclipse.jdt.annotation.Nullable
+org.eclipse.jdt.core.compiler.annotation.nullanalysis=disabled
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=ignore
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
+org.eclipse.jdt.core.compiler.problem.comparingIdentical=ignore
+org.eclipse.jdt.core.compiler.problem.deadCode=ignore
+org.eclipse.jdt.core.compiler.problem.deprecation=ignore
+org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
+org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
+org.eclipse.jdt.core.compiler.problem.discouragedReference=ignore
+org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.explicitlyClosedAutoCloseable=ignore
+org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
+org.eclipse.jdt.core.compiler.problem.fatalOptionalError=disabled
+org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
+org.eclipse.jdt.core.compiler.problem.finalParameterBound=ignore
+org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=ignore
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=ignore
+org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=ignore
+org.eclipse.jdt.core.compiler.problem.includeNullInfoFromAsserts=disabled
+org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=ignore
+org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore
+org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
+org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
+org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=ignore
+org.eclipse.jdt.core.compiler.problem.missingDefaultCase=ignore
+org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingEnumCaseDespiteDefault=disabled
+org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotationForInterfaceMethodImplementation=enabled
+org.eclipse.jdt.core.compiler.problem.missingSerialVersion=ignore
+org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=ignore
+org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
+org.eclipse.jdt.core.compiler.problem.nonnullParameterAnnotationDropped=warning
+org.eclipse.jdt.core.compiler.problem.nullAnnotationInferenceConflict=error
+org.eclipse.jdt.core.compiler.problem.nullReference=ignore
+org.eclipse.jdt.core.compiler.problem.nullSpecViolation=error
+org.eclipse.jdt.core.compiler.problem.nullUncheckedConversion=warning
+org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=ignore
+org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
+org.eclipse.jdt.core.compiler.problem.potentiallyUnclosedCloseable=ignore
+org.eclipse.jdt.core.compiler.problem.rawTypeReference=ignore
+org.eclipse.jdt.core.compiler.problem.redundantNullAnnotation=warning
+org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
+org.eclipse.jdt.core.compiler.problem.redundantSpecificationOfTypeArguments=ignore
+org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
+org.eclipse.jdt.core.compiler.problem.reportMethodCanBePotentiallyStatic=ignore
+org.eclipse.jdt.core.compiler.problem.reportMethodCanBeStatic=ignore
+org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
+org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=ignore
+org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=disabled
+org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
+org.eclipse.jdt.core.compiler.problem.syntacticNullAnalysisForFields=disabled
+org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
+org.eclipse.jdt.core.compiler.problem.typeParameterHiding=ignore
+org.eclipse.jdt.core.compiler.problem.unavoidableGenericTypeProblems=enabled
+org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=ignore
+org.eclipse.jdt.core.compiler.problem.unclosedCloseable=ignore
+org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
+org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=ignore
+org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
+org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
+org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.unusedExceptionParameter=ignore
+org.eclipse.jdt.core.compiler.problem.unusedImport=ignore
+org.eclipse.jdt.core.compiler.problem.unusedLabel=ignore
+org.eclipse.jdt.core.compiler.problem.unusedLocal=ignore
+org.eclipse.jdt.core.compiler.problem.unusedObjectAllocation=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
+org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=ignore
+org.eclipse.jdt.core.compiler.problem.unusedTypeParameter=ignore
+org.eclipse.jdt.core.compiler.problem.unusedWarningToken=ignore
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=ignore
+org.eclipse.jdt.core.compiler.source=1.7
diff --git a/jgibblda/.settings/org.eclipse.m2e.core.prefs b/jgibblda/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 0000000000000000000000000000000000000000..f897a7f1cb2389f85fe6381425d29f0a9866fb65
--- /dev/null
+++ b/jgibblda/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/jgibblda/pom.xml b/jgibblda/pom.xml
new file mode 100644
index 0000000000000000000000000000000000000000..321ed5c26cb8a7c82edd5a980787899e3621d4e2
--- /dev/null
+++ b/jgibblda/pom.xml
@@ -0,0 +1,19 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<groupId>JGibbLDA</groupId>
+	<artifactId>JGibbLDA</artifactId>
+	<version>0.0.1-SNAPSHOT</version>
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+		<maven.compiler.target>1.7</maven.compiler.target>
+		<maven.compiler.source>1.7</maven.compiler.source>
+	</properties>
+	<dependencies>
+		<dependency>
+			<groupId>args4j</groupId>
+			<artifactId>args4j</artifactId>
+			<version>2.0.6</version>
+		</dependency>
+	</dependencies>
+</project>
\ No newline at end of file
diff --git a/jgibblda/src/jgibblda/Constants.java b/jgibblda/src/jgibblda/Constants.java
new file mode 100644
index 0000000000000000000000000000000000000000..93b104457a1df0037d9f8dbd29771019b8419ab4
--- /dev/null
+++ b/jgibblda/src/jgibblda/Constants.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+public class Constants {
+	public static final long BUFFER_SIZE_LONG = 1000000;
+	public static final short BUFFER_SIZE_SHORT = 512;
+	
+	public static final int MODEL_STATUS_UNKNOWN = 0;
+	public static final int MODEL_STATUS_EST = 1;
+	public static final int MODEL_STATUS_ESTC = 2;
+	public static final int MODEL_STATUS_INF = 3;
+}
diff --git a/jgibblda/src/jgibblda/Conversion.java b/jgibblda/src/jgibblda/Conversion.java
new file mode 100644
index 0000000000000000000000000000000000000000..879871b40f47e5bc0fbdd7c434687e380c449e9f
--- /dev/null
+++ b/jgibblda/src/jgibblda/Conversion.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+public class Conversion {
+	public static String ZeroPad( int number, int width )
+	{
+	      StringBuffer result = new StringBuffer("");
+	      for( int i = 0; i < width-Integer.toString(number).length(); i++ )
+	         result.append( "0" );
+	      result.append( Integer.toString(number) );
+	     
+	      return result.toString();
+	}
+}
diff --git a/jgibblda/src/jgibblda/Dictionary.java b/jgibblda/src/jgibblda/Dictionary.java
new file mode 100644
index 0000000000000000000000000000000000000000..842e3f66711153ef93341b747d032a50a292b6df
--- /dev/null
+++ b/jgibblda/src/jgibblda/Dictionary.java
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+package jgibblda;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.StringTokenizer;
+
+public class Dictionary {
+	public Map<String,Integer> word2id;
+	public Map<Integer, String> id2word;
+		
+	//--------------------------------------------------
+	// constructors
+	//--------------------------------------------------
+	
+	public Dictionary(){
+		word2id = new HashMap<String, Integer>();
+		id2word = new HashMap<Integer, String>();
+	}
+	
+	//---------------------------------------------------
+	// get/set methods
+	//---------------------------------------------------
+	
+	public String getWord(int id){
+		return id2word.get(id);
+	}
+	
+	public Integer getID (String word){
+		return word2id.get(word);
+	}
+	
+	//----------------------------------------------------
+	// checking methods
+	//----------------------------------------------------
+	/**
+	 * check if this dictionary contains a specified word
+	 */
+	public boolean contains(String word){
+		return word2id.containsKey(word);
+	}
+	
+	public boolean contains(int id){
+		return id2word.containsKey(id);
+	}
+	//---------------------------------------------------
+	// manupulating methods
+	//---------------------------------------------------
+	/**
+	 * add a word into this dictionary
+	 * return the corresponding id
+	 */
+	public int addWord(String word){
+		if (!contains(word)){
+			int id = word2id.size();
+			
+			word2id.put(word, id);
+			id2word.put(id,word);
+			
+			return id;
+		}
+		else return getID(word);		
+	}
+	
+	//---------------------------------------------------
+	// I/O methods
+	//---------------------------------------------------
+	/**
+	 * read dictionary from file
+	 */
+	public boolean readWordMap(String wordMapFile){		
+		try{
+			BufferedReader reader = new BufferedReader(new InputStreamReader(
+					new FileInputStream(wordMapFile), "UTF-8"));
+			String line;
+			
+			//read the number of words
+			line = reader.readLine();			
+			int nwords = Integer.parseInt(line);
+			
+			//read map
+			for (int i = 0; i < nwords; ++i){
+				line = reader.readLine();
+				StringTokenizer tknr = new StringTokenizer(line, " \t\n\r");
+				
+				if (tknr.countTokens() != 2) continue;
+				
+				String word = tknr.nextToken();
+				String id = tknr.nextToken();
+				int intID = Integer.parseInt(id);
+				
+				id2word.put(intID, word);
+				word2id.put(word, intID);
+			}
+			
+			reader.close();
+			return true;
+		}
+		catch (Exception e){
+			System.out.println("Error while reading dictionary:" + e.getMessage());
+			e.printStackTrace();
+			return false;
+		}		
+	}
+	
+	public boolean writeWordMap(String wordMapFile){
+		try{
+			BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
+					new FileOutputStream(wordMapFile), "UTF-8"));
+			
+			//write number of words
+			writer.write(word2id.size() + "\n");
+			
+			//write word to id
+			Iterator<String> it = word2id.keySet().iterator();
+			while (it.hasNext()){
+				String key = it.next();
+				Integer value = word2id.get(key);
+				
+				writer.write(key + " " + value + "\n");
+			}
+			
+			writer.close();
+			return true;
+		}
+		catch (Exception e){
+			System.out.println("Error while writing word map " + e.getMessage());
+			e.printStackTrace();
+			return false;
+		}
+		
+		
+	}
+}
diff --git a/jgibblda/src/jgibblda/Document.java b/jgibblda/src/jgibblda/Document.java
new file mode 100644
index 0000000000000000000000000000000000000000..679d568a98691f5a82f98c8bc1dca5b7b100e7a8
--- /dev/null
+++ b/jgibblda/src/jgibblda/Document.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+import java.util.Vector;
+
+public class Document {
+
+	//----------------------------------------------------
+	//Instance Variables
+	//----------------------------------------------------
+	public int [] words;
+	public String rawStr;
+	public int length;
+	
+	//----------------------------------------------------
+	//Constructors
+	//----------------------------------------------------
+	public Document(){
+		words = null;
+		rawStr = "";
+		length = 0;
+	}
+	
+	public Document(int length){
+		this.length = length;
+		rawStr = "";
+		words = new int[length];
+	}
+	
+	public Document(int length, int [] words){
+		this.length = length;
+		rawStr = "";
+		
+		this.words = new int[length];
+		for (int i =0 ; i < length; ++i){
+			this.words[i] = words[i];
+		}
+	}
+	
+	public Document(int length, int [] words, String rawStr){
+		this.length = length;
+		this.rawStr = rawStr;
+		
+		this.words = new int[length];
+		for (int i =0 ; i < length; ++i){
+			this.words[i] = words[i];
+		}
+	}
+	
+	public Document(Vector<Integer> doc){
+		this.length = doc.size();
+		rawStr = "";
+		this.words = new int[length];
+		for (int i = 0; i < length; i++){
+			this.words[i] = doc.get(i);
+		}
+	}
+	
+	public Document(Vector<Integer> doc, String rawStr){
+		this.length = doc.size();
+		this.rawStr = rawStr;
+		this.words = new int[length];
+		for (int i = 0; i < length; ++i){
+			this.words[i] = doc.get(i);
+		}
+	}
+}
diff --git a/jgibblda/src/jgibblda/Estimator.java b/jgibblda/src/jgibblda/Estimator.java
new file mode 100644
index 0000000000000000000000000000000000000000..24f9b85efcd5ad7c4260ace6b9c614fa76f82693
--- /dev/null
+++ b/jgibblda/src/jgibblda/Estimator.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+import java.io.File;
+import java.util.Vector;
+
+public class Estimator {
+	
+	// output model
+	protected Model trnModel;
+	LDACmdOption option;
+	
+	public boolean init(LDACmdOption option){
+		this.option = option;
+		trnModel = new Model();
+		
+		if (option.est){
+			if (!trnModel.initNewModel(option))
+				return false;
+			trnModel.data.localDict.writeWordMap(option.dir + File.separator + option.wordMapFileName);
+		}
+		else if (option.estc){
+			if (!trnModel.initEstimatedModel(option))
+				return false;
+		}
+		
+		return true;
+	}
+	
+	public void estimate(){
+		System.out.println("Sampling " + trnModel.niters + " iteration!");
+		
+		int lastIter = trnModel.liter;
+		for (trnModel.liter = lastIter + 1; trnModel.liter < trnModel.niters + lastIter; trnModel.liter++){
+			System.out.println("Iteration " + trnModel.liter + " ...");
+			
+			// for all z_i
+			for (int m = 0; m < trnModel.M; m++){				
+				for (int n = 0; n < trnModel.data.docs[m].length; n++){
+					// z_i = z[m][n]
+					// sample from p(z_i|z_-i, w)
+					int topic = sampling(m, n);
+					trnModel.z[m].set(n, topic);
+				}// end for each word
+			}// end for each document
+			
+			if (option.savestep > 0){
+				if (trnModel.liter % option.savestep == 0){
+					System.out.println("Saving the model at iteration " + trnModel.liter + " ...");
+					computeTheta();
+					computePhi();
+					trnModel.saveModel("model-" + Conversion.ZeroPad(trnModel.liter, 5));
+				}
+			}
+		}// end iterations		
+		
+		System.out.println("Gibbs sampling completed!\n");
+		System.out.println("Saving the final model!\n");
+		computeTheta();
+		computePhi();
+		trnModel.liter--;
+		trnModel.saveModel("model-final");
+	}
+	
+	/**
+	 * Do sampling
+	 * @param m document number
+	 * @param n word number
+	 * @return topic id
+	 */
+	public int sampling(int m, int n){
+		// remove z_i from the count variable
+		int topic = trnModel.z[m].get(n);
+		int w = trnModel.data.docs[m].words[n];
+		
+		trnModel.nw[w][topic] -= 1;
+		trnModel.nd[m][topic] -= 1;
+		trnModel.nwsum[topic] -= 1;
+		trnModel.ndsum[m] -= 1;
+		
+		double Vbeta = trnModel.V * trnModel.beta;
+		double Kalpha = trnModel.K * trnModel.alpha;
+		
+		//do multinominal sampling via cumulative method
+		for (int k = 0; k < trnModel.K; k++){
+			trnModel.p[k] = (trnModel.nw[w][k] + trnModel.beta)/(trnModel.nwsum[k] + Vbeta) *
+					(trnModel.nd[m][k] + trnModel.alpha)/(trnModel.ndsum[m] + Kalpha);
+		}
+		
+		// cumulate multinomial parameters
+		for (int k = 1; k < trnModel.K; k++){
+			trnModel.p[k] += trnModel.p[k - 1];
+		}
+		
+		// scaled sample because of unnormalized p[]
+		double u = Math.random() * trnModel.p[trnModel.K - 1];
+		
+		for (topic = 0; topic < trnModel.K; topic++){
+			if (trnModel.p[topic] > u) //sample topic w.r.t distribution p
+				break;
+		}
+		
+		// add newly estimated z_i to count variables
+		trnModel.nw[w][topic] += 1;
+		trnModel.nd[m][topic] += 1;
+		trnModel.nwsum[topic] += 1;
+		trnModel.ndsum[m] += 1;
+		
+ 		return topic;
+	}
+	
+	public void computeTheta(){
+		for (int m = 0; m < trnModel.M; m++){
+			for (int k = 0; k < trnModel.K; k++){
+				trnModel.theta[m][k] = (trnModel.nd[m][k] + trnModel.alpha) / (trnModel.ndsum[m] + trnModel.K * trnModel.alpha);
+			}
+		}
+	}
+	
+	public void computePhi(){
+		for (int k = 0; k < trnModel.K; k++){
+			for (int w = 0; w < trnModel.V; w++){
+				trnModel.phi[k][w] = (trnModel.nw[w][k] + trnModel.beta) / (trnModel.nwsum[k] + trnModel.V * trnModel.beta);
+			}
+		}
+	}
+}
diff --git a/jgibblda/src/jgibblda/Inferencer.java b/jgibblda/src/jgibblda/Inferencer.java
new file mode 100644
index 0000000000000000000000000000000000000000..2248db6627760f98f3e5d5b471d5f787ed98b03e
--- /dev/null
+++ b/jgibblda/src/jgibblda/Inferencer.java
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+import java.util.Vector;
+
+public class Inferencer {	
+	// Train model
+	public Model trnModel;
+	public Dictionary globalDict;
+	private LDACmdOption option;
+	
+	private Model newModel;
+	public int niters = 100;
+	
+	//-----------------------------------------------------
+	// Init method
+	//-----------------------------------------------------
+	public boolean init(LDACmdOption option){
+		this.option = option;
+		trnModel = new Model();
+		
+		if (!trnModel.initEstimatedModel(option))
+			return false;		
+		
+		globalDict = trnModel.data.localDict;
+		computeTrnTheta();
+		computeTrnPhi();
+		
+		return true;
+	}
+	
+	//inference new model ~ getting data from a specified dataset
+	public Model inference( LDADataset newData){
+		System.out.println("init new model");
+		Model newModel = new Model();		
+		
+		newModel.initNewModel(option, newData, trnModel);		
+		this.newModel = newModel;		
+		
+		System.out.println("Sampling " + niters + " iteration for inference!");		
+		for (newModel.liter = 1; newModel.liter <= niters; newModel.liter++){
+			//System.out.println("Iteration " + newModel.liter + " ...");
+			
+			// for all newz_i
+			for (int m = 0; m < newModel.M; ++m){
+				for (int n = 0; n < newModel.data.docs[m].length; n++){
+					// (newz_i = newz[m][n]
+					// sample from p(z_i|z_-1,w)
+					int topic = infSampling(m, n);
+					newModel.z[m].set(n, topic);
+				}
+			}//end foreach new doc
+			
+		}// end iterations
+		
+		System.out.println("Gibbs sampling for inference completed!");
+		
+		computeNewTheta();
+		computeNewPhi();
+		newModel.liter--;
+		return this.newModel;
+	}
+	
+	public Model inference(String [] strs){
+		//System.out.println("inference");
+		Model newModel = new Model();
+		
+		//System.out.println("read dataset");
+		LDADataset dataset = LDADataset.readDataSet(strs, globalDict);
+		
+		return inference(dataset);
+	}
+	
+	//inference new model ~ getting dataset from file specified in option
+	public Model inference(){	
+		//System.out.println("inference");
+		
+		newModel = new Model();
+		if (!newModel.initNewModel(option, trnModel)) return null;
+		
+		System.out.println("Sampling " + niters + " iteration for inference!");
+		
+		for (newModel.liter = 1; newModel.liter <= niters; newModel.liter++){
+			//System.out.println("Iteration " + newModel.liter + " ...");
+			
+			// for all newz_i
+			for (int m = 0; m < newModel.M; ++m){
+				for (int n = 0; n < newModel.data.docs[m].length; n++){
+					// (newz_i = newz[m][n]
+					// sample from p(z_i|z_-1,w)
+					int topic = infSampling(m, n);
+					newModel.z[m].set(n, topic);
+				}
+			}//end foreach new doc
+			
+		}// end iterations
+		
+		System.out.println("Gibbs sampling for inference completed!");		
+		System.out.println("Saving the inference outputs!");
+		
+		computeNewTheta();
+		computeNewPhi();
+		newModel.liter--;
+		newModel.saveModel(newModel.dfile + "." + newModel.modelName);		
+		
+		return newModel;
+	}
+	
+	/**
+	 * do sampling for inference
+	 * m: document number
+	 * n: word number?
+	 */
+	protected int infSampling(int m, int n){
+		// remove z_i from the count variables
+		int topic = newModel.z[m].get(n);
+		int _w = newModel.data.docs[m].words[n];
+		int w = newModel.data.lid2gid.get(_w);
+		newModel.nw[_w][topic] -= 1;
+		newModel.nd[m][topic] -= 1;
+		newModel.nwsum[topic] -= 1;
+		newModel.ndsum[m] -= 1;
+		
+		double Vbeta = trnModel.V * newModel.beta;
+		double Kalpha = trnModel.K * newModel.alpha;
+		
+		// do multinomial sampling via cummulative method		
+		for (int k = 0; k < newModel.K; k++){			
+			newModel.p[k] = (trnModel.nw[w][k] + newModel.nw[_w][k] + newModel.beta)/(trnModel.nwsum[k] +  newModel.nwsum[k] + Vbeta) *
+					(newModel.nd[m][k] + newModel.alpha)/(newModel.ndsum[m] + Kalpha);
+		}
+		
+		// cummulate multinomial parameters
+		for (int k = 1; k < newModel.K; k++){
+			newModel.p[k] += newModel.p[k - 1];
+		}
+		
+		// scaled sample because of unnormalized p[]
+		double u = Math.random() * newModel.p[newModel.K - 1];
+		
+		for (topic = 0; topic < newModel.K; topic++){
+			if (newModel.p[topic] > u)
+				break;
+		}
+		
+		// add newly estimated z_i to count variables
+		newModel.nw[_w][topic] += 1;
+		newModel.nd[m][topic] += 1;
+		newModel.nwsum[topic] += 1;
+		newModel.ndsum[m] += 1;
+		
+		return topic;
+	}
+	
+	protected void computeNewTheta(){
+		for (int m = 0; m < newModel.M; m++){
+			for (int k = 0; k < newModel.K; k++){
+				newModel.theta[m][k] = (newModel.nd[m][k] + newModel.alpha) / (newModel.ndsum[m] + newModel.K * newModel.alpha);
+			}//end foreach topic
+		}//end foreach new document
+	}
+	
+	protected void computeNewPhi(){
+		for (int k = 0; k < newModel.K; k++){
+			for (int _w = 0; _w < newModel.V; _w++){
+				Integer id = newModel.data.lid2gid.get(_w);
+				
+				if (id != null){
+					newModel.phi[k][_w] = (trnModel.nw[id][k] + newModel.nw[_w][k] + newModel.beta) / (newModel.nwsum[k] + newModel.nwsum[k] + trnModel.V * newModel.beta);
+				}
+			}//end foreach word
+		}// end foreach topic
+	}
+	
+	protected void computeTrnTheta(){
+		for (int m = 0; m < trnModel.M; m++){
+			for (int k = 0; k < trnModel.K; k++){
+				trnModel.theta[m][k] = (trnModel.nd[m][k] + trnModel.alpha) / (trnModel.ndsum[m] + trnModel.K * trnModel.alpha);
+			}
+		}
+	}
+	
+	protected void computeTrnPhi(){
+		for (int k = 0; k < trnModel.K; k++){
+			for (int w = 0; w < trnModel.V; w++){
+				trnModel.phi[k][w] = (trnModel.nw[w][k] + trnModel.beta) / (trnModel.nwsum[k] + trnModel.V * trnModel.beta);
+			}
+		}
+	}
+}
diff --git a/jgibblda/src/jgibblda/LDA.java b/jgibblda/src/jgibblda/LDA.java
new file mode 100644
index 0000000000000000000000000000000000000000..c6ca2a26cf2d3e3f7163eeb25044f637dc410ec9
--- /dev/null
+++ b/jgibblda/src/jgibblda/LDA.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+import org.kohsuke.args4j.*;
+
+public class LDA {
+	
+	public static void main(String args[]){
+		LDACmdOption option = new LDACmdOption();
+		CmdLineParser parser = new CmdLineParser(option);
+		
+		try {
+			if (args.length == 0){
+				showHelp(parser);
+				return;
+			}
+			
+			parser.parseArgument(args);
+			
+			if (option.est || option.estc){
+				Estimator estimator = new Estimator();
+				estimator.init(option);
+				estimator.estimate();
+			}
+			else if (option.inf){
+				Inferencer inferencer = new Inferencer();
+				inferencer.init(option);
+				
+				Model newModel = inferencer.inference();
+			
+				for (int i = 0; i < newModel.phi.length; ++i){
+					//phi: K * V
+					System.out.println("-----------------------\ntopic" + i  + " : ");
+					for (int j = 0; j < 10; ++j){
+						System.out.println(inferencer.globalDict.id2word.get(j) + "\t" + newModel.phi[i][j]);
+					}
+				}
+			}
+		}
+		catch (CmdLineException cle){
+			System.out.println("Command line error: " + cle.getMessage());
+			showHelp(parser);
+			return;
+		}
+		catch (Exception e){
+			System.out.println("Error in main: " + e.getMessage());
+			e.printStackTrace();
+			return;
+		}
+	}
+	
+	public static void showHelp(CmdLineParser parser){
+		System.out.println("LDA [options ...] [arguments...]");
+		parser.printUsage(System.out);
+	}
+	
+}
diff --git a/jgibblda/src/jgibblda/LDACmdOption.java b/jgibblda/src/jgibblda/LDACmdOption.java
new file mode 100644
index 0000000000000000000000000000000000000000..bc330beef00a417f8b69cd72b1a24815926ee347
--- /dev/null
+++ b/jgibblda/src/jgibblda/LDACmdOption.java
@@ -0,0 +1,48 @@
+package jgibblda;
+
+import org.kohsuke.args4j.*;
+
+public class LDACmdOption {
+	
+	@Option(name="-est", usage="Specify whether we want to estimate model from scratch")
+	public boolean est = false;
+	
+	@Option(name="-estc", usage="Specify whether we want to continue the last estimation")
+	public boolean estc = false;
+	
+	@Option(name="-inf", usage="Specify whether we want to do inference")
+	public boolean inf = true;
+	
+	@Option(name="-dir", usage="Specify directory")
+	public String dir = "";
+	
+	@Option(name="-dfile", usage="Specify data file")
+	public String dfile = "";
+	
+	@Option(name="-model", usage="Specify the model name")
+	public String modelName = "";
+	
+	@Option(name="-alpha", usage="Specify alpha")
+	public double alpha = -1.0;
+	
+	@Option(name="-beta", usage="Specify beta")
+	public double beta = -1.0;
+	
+	@Option(name="-ntopics", usage="Specify the number of topics")
+	public int K = 100;
+	
+	@Option(name="-niters", usage="Specify the number of iterations")
+	public int niters = 1000;
+	
+	@Option(name="-savestep", usage="Specify the number of steps to save the model since the last save")
+	public int savestep = 100;
+	
+	@Option(name="-twords", usage="Specify the number of most likely words to be printed for each topic")
+	public int twords = 100;
+	
+	@Option(name="-withrawdata", usage="Specify whether we include raw data in the input")
+	public boolean withrawdata = false;
+	
+	@Option(name="-wordmap", usage="Specify the wordmap file")
+	public String wordMapFileName = "wordmap.txt";
+}
diff --git a/jgibblda/src/jgibblda/LDADataset.java b/jgibblda/src/jgibblda/LDADataset.java
new file mode 100644
index 0000000000000000000000000000000000000000..d56f96b3b961782466b6384d5ed40dd867e3abbf
--- /dev/null
+++ b/jgibblda/src/jgibblda/LDADataset.java
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+package jgibblda;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Vector;
+
+public class LDADataset {
+	//---------------------------------------------------------------
+	// Instance Variables
+	//---------------------------------------------------------------
+	
+	public Dictionary localDict;			// local dictionary	
+	public Document [] docs; 		// a list of documents	
+	public int M; 			 		// number of documents
+	public int V;			 		// number of words
+	
+	// map from local coordinates (id) to global ones 
+	// null if the global dictionary is not set
+	public Map<Integer, Integer> lid2gid; 
+	
+	//link to a global dictionary (optional), null for train data, not null for test data
+	public Dictionary globalDict;	 		
+	
+	//--------------------------------------------------------------
+	// Constructor
+	//--------------------------------------------------------------
+	public LDADataset(){
+		localDict = new Dictionary();
+		M = 0;
+		V = 0;
+		docs = null;
+	
+		globalDict = null;
+		lid2gid = null;
+	}
+	
+	public LDADataset(int M){
+		localDict = new Dictionary();
+		this.M = M;
+		this.V = 0;
+		docs = new Document[M];	
+		
+		globalDict = null;
+		lid2gid = null;
+	}
+	
+	public LDADataset(int M, Dictionary globalDict){
+		localDict = new Dictionary();	
+		this.M = M;
+		this.V = 0;
+		docs = new Document[M];	
+		
+		this.globalDict = globalDict;
+		lid2gid = new HashMap<Integer, Integer>();
+	}
+	
+	//-------------------------------------------------------------
+	//Public Instance Methods
+	//-------------------------------------------------------------
+	/**
+	 * set the document at the index idx if idx is greater than 0 and less than M
+	 * @param doc document to be set
+	 * @param idx index in the document array
+	 */	
+	public void setDoc(Document doc, int idx){
+		if (0 <= idx && idx < M){
+			docs[idx] = doc;
+		}
+	}
+	/**
+	 * set the document at the index idx if idx is greater than 0 and less than M
+	 * @param str string contains doc
+	 * @param idx index in the document array
+	 */
+	public void setDoc(String str, int idx){
+		if (0 <= idx && idx < M){
+			String [] words = str.split("[ \\t\\n]");
+			
+			Vector<Integer> ids = new Vector<Integer>();
+			
+			for (String word : words){
+				int _id = localDict.word2id.size();
+				
+				if (localDict.contains(word))		
+					_id = localDict.getID(word);
+								
+				if (globalDict != null){
+					//get the global id					
+					Integer id = globalDict.getID(word);
+					//System.out.println(id);
+					
+					if (id != null){
+						localDict.addWord(word);
+						
+						lid2gid.put(_id, id);
+						ids.add(_id);
+					}
+					else { //not in global dictionary
+						//do nothing currently
+					}
+				}
+				else {
+					localDict.addWord(word);
+					ids.add(_id);
+				}
+			}
+			
+			Document doc = new Document(ids, str);
+			docs[idx] = doc;
+			V = localDict.word2id.size();			
+		}
+	}
+	//---------------------------------------------------------------
+	// I/O methods
+	//---------------------------------------------------------------
+	
+	/**
+	 *  read a dataset from a stream, create new dictionary
+	 *  @return dataset if success and null otherwise
+	 */
+	public static LDADataset readDataSet(String filename){
+		try {
+			BufferedReader reader = new BufferedReader(new InputStreamReader(
+					new FileInputStream(filename), "UTF-8"));
+			
+			LDADataset data = readDataSet(reader);
+			
+			reader.close();
+			return data;
+		}
+		catch (Exception e){
+			System.out.println("Read Dataset Error: " + e.getMessage());
+			e.printStackTrace();
+			return null;
+		}
+	}
+	
+	/**
+	 * read a dataset from a file with a preknown vocabulary
+	 * @param filename file from which we read dataset
+	 * @param dict the dictionary
+	 * @return dataset if success and null otherwise
+	 */
+	public static LDADataset readDataSet(String filename, Dictionary dict){
+		try {
+			BufferedReader reader = new BufferedReader(new InputStreamReader(
+					new FileInputStream(filename), "UTF-8"));
+			LDADataset data = readDataSet(reader, dict);
+			
+			reader.close();
+			return data;
+		}
+		catch (Exception e){
+			System.out.println("Read Dataset Error: " + e.getMessage());
+			e.printStackTrace();
+			return null;
+		}
+	}
+	
+	/**
+	 *  read a dataset from a stream, create new dictionary
+	 *  @return dataset if success and null otherwise
+	 */
+	public static LDADataset readDataSet(BufferedReader reader){
+		try {
+			//read number of document
+			String line;
+			line = reader.readLine();
+			int M = Integer.parseInt(line);
+			
+			LDADataset data = new LDADataset(M);
+			for (int i = 0; i < M; ++i){
+				line = reader.readLine();
+				
+				data.setDoc(line, i);
+			}
+			
+			return data;
+		}
+		catch (Exception e){
+			System.out.println("Read Dataset Error: " + e.getMessage());
+			e.printStackTrace();
+			return null;
+		}
+	}
+	
+	/**
+	 * read a dataset from a stream with respect to a specified dictionary
+	 * @param reader stream from which we read dataset
+	 * @param dict the dictionary
+	 * @return dataset if success and null otherwise
+	 */
+	public static LDADataset readDataSet(BufferedReader reader, Dictionary dict){
+		try {
+			//read number of document
+			String line;
+			line = reader.readLine();
+			int M = Integer.parseInt(line);
+			System.out.println("NewM:" + M);
+			
+			LDADataset data = new LDADataset(M, dict);
+			for (int i = 0; i < M; ++i){
+				line = reader.readLine();
+				
+				data.setDoc(line, i);
+			}
+			
+			return data;
+		}
+		catch (Exception e){
+			System.out.println("Read Dataset Error: " + e.getMessage());
+			e.printStackTrace();
+			return null;
+		}
+	}
+	
+	/**
+	 * read a dataset from a string, create new dictionary
+	 * @param str String from which we get the dataset, documents are seperated by newline character 
+	 * @return dataset if success and null otherwise
+	 */
+	public static LDADataset readDataSet(String [] strs){
+		LDADataset data = new LDADataset(strs.length);
+		
+		for (int i = 0 ; i < strs.length; ++i){
+			data.setDoc(strs[i], i);
+		}
+		return data;
+	}
+	
+	/**
+	 * read a dataset from a string with respect to a specified dictionary
+	 * @param str String from which we get the dataset, documents are seperated by newline character	
+	 * @param dict the dictionary
+	 * @return dataset if success and null otherwise
+	 */
+	public static LDADataset readDataSet(String [] strs, Dictionary dict){
+		//System.out.println("readDataset...");
+		LDADataset data = new LDADataset(strs.length, dict);
+		
+		for (int i = 0 ; i < strs.length; ++i){
+			//System.out.println("set doc " + i);
+			data.setDoc(strs[i], i);
+		}
+		return data;
+	}
+}
diff --git a/jgibblda/src/jgibblda/Model.java b/jgibblda/src/jgibblda/Model.java
new file mode 100644
index 0000000000000000000000000000000000000000..af5003b5af21bc53eb9f7ed82711612ed61fa43d
--- /dev/null
+++ b/jgibblda/src/jgibblda/Model.java
@@ -0,0 +1,716 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+package jgibblda;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.StringTokenizer;
+import java.util.Vector;
+
+public class Model {	
+	
+	//---------------------------------------------------------------
+	//	Class Variables
+	//---------------------------------------------------------------
+	
+	public static String tassignSuffix;	//suffix for topic assignment file
+	public static String thetaSuffix;		//suffix for theta (topic - document distribution) file
+	public static String phiSuffix;		//suffix for phi file (topic - word distribution) file
+	public static String othersSuffix; 	//suffix for containing other parameters
+	public static String twordsSuffix;		//suffix for file containing words-per-topics
+	
+	//---------------------------------------------------------------
+	//	Model Parameters and Variables
+	//---------------------------------------------------------------
+	
+	public String wordMapFile; 		//file that contain word to id map
+	public String trainlogFile; 	//training log file	
+	
+	public String dir;
+	public String dfile;
+	public String modelName;
+	public int modelStatus; 		//see Constants class for status of model
+	public LDADataset data;			// link to a dataset
+	
+	public int M; //dataset size (i.e., number of docs)
+	public int V; //vocabulary size
+	public int K; //number of topics
+	public double alpha, beta; //LDA  hyperparameters
+	public int niters; //number of Gibbs sampling iteration
+	public int liter; //the iteration at which the model was saved	
+	public int savestep; //saving period
+	public int twords; //print out top words per each topic
+	public int withrawdata;
+	
+	// Estimated/Inferenced parameters
+	public double [][] theta; //theta: document - topic distributions, size M x K
+	public double [][] phi; // phi: topic-word distributions, size K x V
+	
+	// Temp variables while sampling
+	public Vector<Integer> [] z; //topic assignments for words, size M x doc.size()
+	protected int [][] nw; //nw[i][j]: number of instances of word/term i assigned to topic j, size V x K
+	protected int [][] nd; //nd[i][j]: number of words in document i assigned to topic j, size M x K
+	protected int [] nwsum; //nwsum[j]: total number of words assigned to topic j, size K
+	protected int [] ndsum; //ndsum[i]: total number of words in document i, size M
+	
+	// temp variables for sampling
+	protected double [] p; 
+	
+	//---------------------------------------------------------------
+	//	Constructors
+	//---------------------------------------------------------------	
+
+	public Model(){
+		setDefaultValues();	
+	}
+	
+	/**
+	 * Set default values for variables
+	 */
+	public void setDefaultValues(){
+		wordMapFile = "wordmap.txt";
+		trainlogFile = "trainlog.txt";
+		tassignSuffix = ".tassign";
+		thetaSuffix = ".theta";
+		phiSuffix = ".phi";
+		othersSuffix = ".others";
+		twordsSuffix = ".twords";
+		
+		dir = "./";
+		dfile = "trndocs.dat";
+		modelName = "model-final";
+		modelStatus = Constants.MODEL_STATUS_UNKNOWN;		
+		
+		M = 0;
+		V = 0;
+		K = 100;
+		alpha = 50.0 / K;
+		beta = 0.1;
+		niters = 2000;
+		liter = 0;
+		
+		z = null;
+		nw = null;
+		nd = null;
+		nwsum = null;
+		ndsum = null;
+		theta = null;
+		phi = null;
+	}
+	
+	//---------------------------------------------------------------
+	//	I/O Methods
+	//---------------------------------------------------------------
+	/**
+	 * read other file to get parameters
+	 */
+	protected boolean readOthersFile(String otherFile){
+		//open file <model>.others to read:
+		
+		try {
+			BufferedReader reader = new BufferedReader(new FileReader(otherFile));
+			String line;
+			while((line = reader.readLine()) != null){
+				StringTokenizer tknr = new StringTokenizer(line,"= \t\r\n");
+				
+				int count = tknr.countTokens();
+				if (count != 2)
+					continue;
+				
+				String optstr = tknr.nextToken();
+				String optval = tknr.nextToken();
+				
+				if (optstr.equalsIgnoreCase("alpha")){
+					alpha = Double.parseDouble(optval);					
+				}
+				else if (optstr.equalsIgnoreCase("beta")){
+					beta = Double.parseDouble(optval);
+				}
+				else if (optstr.equalsIgnoreCase("ntopics")){
+					K = Integer.parseInt(optval);
+				}
+				else if (optstr.equalsIgnoreCase("liter")){
+					liter = Integer.parseInt(optval);
+				}
+				else if (optstr.equalsIgnoreCase("nwords")){
+					V = Integer.parseInt(optval);
+				}
+				else if (optstr.equalsIgnoreCase("ndocs")){
+					M = Integer.parseInt(optval);
+				}
+				else {
+					// any more?
+				}
+			}
+			
+			reader.close();
+		}
+		catch (Exception e){
+			System.out.println("Error while reading other file:" + e.getMessage());
+			e.printStackTrace();
+			return false;
+		}
+		return true;
+	}
+	
+	protected boolean readTAssignFile(String tassignFile){
+		try {
+			int i,j;
+			BufferedReader reader = new BufferedReader(new InputStreamReader(
+					new FileInputStream(tassignFile), "UTF-8"));
+			
+			String line;
+			z = new Vector[M];			
+			data = new LDADataset(M);
+			data.V = V;			
+			for (i = 0; i < M; i++){
+				line = reader.readLine();
+				StringTokenizer tknr = new StringTokenizer(line, " \t\r\n");
+				
+				int length = tknr.countTokens();
+				
+				Vector<Integer> words = new Vector<Integer>();
+				Vector<Integer> topics = new Vector<Integer>();
+				
+				for (j = 0; j < length; j++){
+					String token = tknr.nextToken();
+					
+					StringTokenizer tknr2 = new StringTokenizer(token, ":");
+					if (tknr2.countTokens() != 2){
+						System.out.println("Invalid word-topic assignment line\n");
+						return false;
+					}
+					
+					words.add(Integer.parseInt(tknr2.nextToken()));
+					topics.add(Integer.parseInt(tknr2.nextToken()));
+				}//end for each topic assignment
+				
+				//allocate and add new document to the corpus
+				Document doc = new Document(words);
+				data.setDoc(doc, i);
+				
+				//assign values for z
+				z[i] = new Vector<Integer>();
+				for (j = 0; j < topics.size(); j++){
+					z[i].add(topics.get(j));
+				}
+				
+			}//end for each doc
+			
+			reader.close();
+		}
+		catch (Exception e){
+			System.out.println("Error while loading model: " + e.getMessage());
+			e.printStackTrace();
+			return false;
+		}
+		return true;
+	}
+	
+	/**
+	 * load saved model
+	 */
+	public boolean loadModel(){
+		if (!readOthersFile(dir + File.separator + modelName + othersSuffix))
+			return false;
+		
+		if (!readTAssignFile(dir + File.separator + modelName + tassignSuffix))
+			return false;
+		
+		// read dictionary
+		Dictionary dict = new Dictionary();
+		if (!dict.readWordMap(dir + File.separator + wordMapFile))
+			return false;
+			
+		data.localDict = dict;
+		
+		return true;
+	}
+	
+	/**
+	 * Save word-topic assignments for this model
+	 */
+	public boolean saveModelTAssign(String filename){
+		int i, j;
+		
+		try{
+			BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
+			
+			//write docs with topic assignments for words
+			for (i = 0; i < data.M; i++){
+				for (j = 0; j < data.docs[i].length; ++j){
+					writer.write(data.docs[i].words[j] + ":" + z[i].get(j) + " ");					
+				}
+				writer.write("\n");
+			}
+				
+			writer.close();
+		}
+		catch (Exception e){
+			System.out.println("Error while saving model tassign: " + e.getMessage());
+			e.printStackTrace();
+			return false;
+		}
+		return true;
+	}
+	
+	/**
+	 * Save theta (topic distribution) for this model
+	 */
+	public boolean saveModelTheta(String filename){
+		try{
+			BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
+			for (int i = 0; i < M; i++){
+				for (int j = 0; j < K; j++){
+					writer.write(theta[i][j] + " ");
+				}
+				writer.write("\n");
+			}
+			writer.close();
+		}
+		catch (Exception e){
+			System.out.println("Error while saving topic distribution file for this model: " + e.getMessage());
+			e.printStackTrace();
+			return false;
+		}
+		return true;
+	}
+	
+	/**
+	 * Save word-topic distribution
+	 */
+	
+	public boolean saveModelPhi(String filename){
+		try {
+			BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
+			
+			for (int i = 0; i < K; i++){
+				for (int j = 0; j < V; j++){
+					writer.write(phi[i][j] + " ");
+				}
+				writer.write("\n");
+			}
+			writer.close();
+		}
+		catch (Exception e){
+			System.out.println("Error while saving word-topic distribution:" + e.getMessage());
+			e.printStackTrace();
+			return false;
+		}
+		return true;
+	}
+	
+	/**
+	 * Save other information of this model
+	 */
+	public boolean saveModelOthers(String filename){
+		try{
+			BufferedWriter writer = new BufferedWriter(new FileWriter(filename));
+			
+			writer.write("alpha=" + alpha + "\n");
+			writer.write("beta=" + beta + "\n");
+			writer.write("ntopics=" + K + "\n");
+			writer.write("ndocs=" + M + "\n");
+			writer.write("nwords=" + V + "\n");
+			writer.write("liters=" + liter + "\n");
+			
+			writer.close();
+		}
+		catch(Exception e){
+			System.out.println("Error while saving model others:" + e.getMessage());
+			e.printStackTrace();
+			return false;
+		}
+		return true;
+	}
+	
+	/**
+	 * Save model the most likely words for each topic
+	 */
+	public boolean saveModelTwords(String filename){
+		try{
+			BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
+					new FileOutputStream(filename), "UTF-8"));
+			
+			if (twords > V){
+				twords = V;
+			}
+			
+			for (int k = 0; k < K; k++){
+				List<Pair> wordsProbsList = new ArrayList<Pair>(); 
+				for (int w = 0; w < V; w++){
+					Pair p = new Pair(w, phi[k][w], false);
+					
+					wordsProbsList.add(p);
+				}//end foreach word
+				
+				//print topic				
+				writer.write("Topic " + k + "th:\n");
+				Collections.sort(wordsProbsList);
+				
+				for (int i = 0; i < twords; i++){
+					if (data.localDict.contains((Integer)wordsProbsList.get(i).first)){
+						String word = data.localDict.getWord((Integer)wordsProbsList.get(i).first);
+						
+						writer.write("\t" + word + " " + wordsProbsList.get(i).second + "\n");
+					}
+				}
+			} //end foreach topic			
+						
+			writer.close();
+		}
+		catch(Exception e){
+			System.out.println("Error while saving model twords: " + e.getMessage());
+			e.printStackTrace();
+			return false;
+		}
+		return true;
+	}
+	
+	/**
+	 * Save model
+	 */
+	public boolean saveModel(String modelName){
+		if (!saveModelTAssign(dir + File.separator + modelName + tassignSuffix)){
+			return false;
+		}
+		
+		if (!saveModelOthers(dir + File.separator + modelName + othersSuffix)){			
+			return false;
+		}
+		
+		if (!saveModelTheta(dir + File.separator + modelName + thetaSuffix)){
+			return false;
+		}
+		
+		if (!saveModelPhi(dir + File.separator + modelName + phiSuffix)){
+			return false;
+		}
+		
+		if (twords > 0){
+			if (!saveModelTwords(dir + File.separator + modelName + twordsSuffix))
+				return false;
+		}
+		return true;
+	}
+	
+	//---------------------------------------------------------------
+	//	Init Methods
+	//---------------------------------------------------------------
+	/**
+	 * initialize the model
+	 */
+	protected boolean init(LDACmdOption option){		
+		if (option == null)
+			return false;
+		
+		modelName = option.modelName;
+		K = option.K;
+		
+		alpha = option.alpha;
+		if (alpha < 0.0)
+			alpha = 50.0 / K;
+		
+		if (option.beta >= 0)
+			beta = option.beta;
+		
+		niters = option.niters;
+		
+		dir = option.dir;
+		if (dir.endsWith(File.separator))
+			dir = dir.substring(0, dir.length() - 1);
+		
+		dfile = option.dfile;
+		twords = option.twords;
+		wordMapFile = option.wordMapFileName;
+		
+		return true;
+	}
+	
+	/**
+	 * Init parameters for estimation
+	 */
+	public boolean initNewModel(LDACmdOption option){
+		if (!init(option))
+			return false;
+		
+		int m, n, w, k;		
+		p = new double[K];		
+		
+		data = LDADataset.readDataSet(dir + File.separator + dfile);
+		if (data == null){
+			System.out.println("Fail to read training data!\n");
+			return false;
+		}
+		
+		//+ allocate memory and assign values for variables		
+		M = data.M;
+		V = data.V;
+		dir = option.dir;
+		savestep = option.savestep;
+		
+		// K: from command line or default value
+	    // alpha, beta: from command line or default values
+	    // niters, savestep: from command line or default values
+
+		nw = new int[V][K];
+		for (w = 0; w < V; w++){
+			for (k = 0; k < K; k++){
+				nw[w][k] = 0;
+			}
+		}
+		
+		nd = new int[M][K];
+		for (m = 0; m < M; m++){
+			for (k = 0; k < K; k++){
+				nd[m][k] = 0;
+			}
+		}
+		
+		nwsum = new int[K];
+		for (k = 0; k < K; k++){
+			nwsum[k] = 0;
+		}
+		
+		ndsum = new int[M];
+		for (m = 0; m < M; m++){
+			ndsum[m] = 0;
+		}
+		
+		z = new Vector[M];
+		for (m = 0; m < data.M; m++){
+			int N = data.docs[m].length;
+			z[m] = new Vector<Integer>();
+			
+			//initilize for z
+			for (n = 0; n < N; n++){
+				int topic = (int)Math.floor(Math.random() * K);
+				z[m].add(topic);
+				
+				// number of instances of word assigned to topic j
+				nw[data.docs[m].words[n]][topic] += 1;
+				// number of words in document i assigned to topic j
+				nd[m][topic] += 1;
+				// total number of words assigned to topic j
+				nwsum[topic] += 1;
+			}
+			// total number of words in document i
+			ndsum[m] = N;
+		}
+		
+		theta = new double[M][K];		
+		phi = new double[K][V];
+		
+		return true;
+	}
+	
+	/**
+	 * Init parameters for inference
+	 * @param newData DataSet for which we do inference
+	 */
+	public boolean initNewModel(LDACmdOption option, LDADataset newData, Model trnModel){
+		if (!init(option))
+			return false;
+		
+		int m, n, w, k;
+		
+		K = trnModel.K;
+		alpha = trnModel.alpha;
+		beta = trnModel.beta;		
+		
+		p = new double[K];
+		System.out.println("K:" + K);
+		
+		data = newData;
+		
+		//+ allocate memory and assign values for variables		
+		M = data.M;
+		V = data.V;
+		dir = option.dir;
+		savestep = option.savestep;
+		System.out.println("M:" + M);
+		System.out.println("V:" + V);
+		
+		// K: from command line or default value
+	    // alpha, beta: from command line or default values
+	    // niters, savestep: from command line or default values
+
+		nw = new int[V][K];
+		for (w = 0; w < V; w++){
+			for (k = 0; k < K; k++){
+				nw[w][k] = 0;
+			}
+		}
+		
+		nd = new int[M][K];
+		for (m = 0; m < M; m++){
+			for (k = 0; k < K; k++){
+				nd[m][k] = 0;
+			}
+		}
+		
+		nwsum = new int[K];
+		for (k = 0; k < K; k++){
+			nwsum[k] = 0;
+		}
+		
+		ndsum = new int[M];
+		for (m = 0; m < M; m++){
+			ndsum[m] = 0;
+		}
+		
+		z = new Vector[M];
+		for (m = 0; m < data.M; m++){
+			int N = data.docs[m].length;
+			z[m] = new Vector<Integer>();
+			
+			//initilize for z
+			for (n = 0; n < N; n++){
+				int topic = (int)Math.floor(Math.random() * K);
+				z[m].add(topic);
+				
+				// number of instances of word assigned to topic j
+				nw[data.docs[m].words[n]][topic] += 1;
+				// number of words in document i assigned to topic j
+				nd[m][topic] += 1;
+				// total number of words assigned to topic j
+				nwsum[topic] += 1;
+			}
+			// total number of words in document i
+			ndsum[m] = N;
+		}
+		
+		theta = new double[M][K];		
+		phi = new double[K][V];
+		
+		return true;
+	}
+	
+	/**
+	 * Init parameters for inference
+	 * reading new dataset from file
+	 */
+	public boolean initNewModel(LDACmdOption option, Model trnModel){
+		if (!init(option))
+			return false;
+		
+		LDADataset dataset = LDADataset.readDataSet(dir + File.separator + dfile, trnModel.data.localDict);
+		if (dataset == null){
+			System.out.println("Fail to read dataset!\n");
+			return false;
+		}
+		
+		return initNewModel(option, dataset , trnModel);
+	}
+	
+	/**
+	 * init parameter for continue estimating or for later inference
+	 */
+	public boolean initEstimatedModel(LDACmdOption option){
+		if (!init(option))
+			return false;
+		
+		int m, n, w, k;
+		
+		p = new double[K];
+		
+		// load model, i.e., read z and trndata
+		if (!loadModel()){
+			System.out.println("Fail to load word-topic assignment file of the model!\n");
+			return false;
+		}
+		
+		System.out.println("Model loaded:");
+		System.out.println("\talpha:" + alpha);
+		System.out.println("\tbeta:" + beta);
+		System.out.println("\tM:" + M);
+		System.out.println("\tV:" + V);		
+		
+		nw = new int[V][K];
+		for (w = 0; w < V; w++){
+			for (k = 0; k < K; k++){
+				nw[w][k] = 0;
+			}
+		}
+		
+		nd = new int[M][K];
+		for (m = 0; m < M; m++){
+			for (k = 0; k < K; k++){
+				nd[m][k] = 0;
+			}
+		}
+		
+		nwsum = new int[K];
+	    for (k = 0; k < K; k++) {
+		nwsum[k] = 0;
+	    }
+	    
+	    ndsum = new int[M];
+	    for (m = 0; m < M; m++) {
+		ndsum[m] = 0;
+	    }
+	    
+	    for (m = 0; m < data.M; m++){
+	    	int N = data.docs[m].length;
+	    	
+	    	// assign values for nw, nd, nwsum, and ndsum
+	    	for (n = 0; n < N; n++){
+	    		w = data.docs[m].words[n];
+	    		int topic = (Integer)z[m].get(n);
+	    		
+	    		// number of instances of word i assigned to topic j
+	    		nw[w][topic] += 1;
+	    		// number of words in document i assigned to topic j
+	    		nd[m][topic] += 1;
+	    		// total number of words assigned to topic j
+	    		nwsum[topic] += 1;	    		
+	    	}
+	    	// total number of words in document i
+	    	ndsum[m] = N;
+	    }
+	    
+	    theta = new double[M][K];
+	    phi = new double[K][V];
+	    dir = option.dir;
+		savestep = option.savestep;
+	    
+		return true;
+	}
+	
+}
diff --git a/jgibblda/src/jgibblda/Pair.java b/jgibblda/src/jgibblda/Pair.java
new file mode 100644
index 0000000000000000000000000000000000000000..98402c894049ffa01d2d58f497812252b857f6ae
--- /dev/null
+++ b/jgibblda/src/jgibblda/Pair.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2007 by
+ * 
+ * 	Xuan-Hieu Phan
+ *	hieuxuan@ecei.tohoku.ac.jp or pxhieu@gmail.com
+ * 	Graduate School of Information Sciences
+ * 	Tohoku University
+ * 
+ *  Cam-Tu Nguyen
+ *  ncamtu@gmail.com
+ *  College of Technology
+ *  Vietnam National University, Hanoi
+ *
+ * JGibbsLDA is a free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * JGibbsLDA is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with JGibbsLDA; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+package jgibblda;
+
+import java.util.Comparator;
+
+public class Pair implements Comparable<Pair> {
+	public Object first;
+	public Comparable second;
+	public static boolean naturalOrder = false;
+	
+	public Pair(Object k, Comparable v){
+		first = k;
+		second = v;		
+	}
+	
+	public Pair(Object k, Comparable v, boolean naturalOrder){
+		first = k;
+		second = v;
+		Pair.naturalOrder = naturalOrder; 
+	}
+	
+	public int compareTo(Pair p){
+		if (naturalOrder)
+			return this.second.compareTo(p.second);
+		else return -this.second.compareTo(p.second);
+	}
+}
+
diff --git a/ma-impl.sublime-workspace b/ma-impl.sublime-workspace
index 2fe3e4206bffc63a7a6bd2237de0613e53f32730..287a731c32279f5ff3cf4d629c457ed759e81e37 100644
--- a/ma-impl.sublime-workspace
+++ b/ma-impl.sublime-workspace
@@ -272,12 +272,11 @@
 	"buffers":
 	[
 		{
-			"contents": "1. import all new articles:\n  * generate article statistics\n  * into database\n  * into filebase, using filebase adapter of selected tm library\n2. recreate topic modeling, using selected tm library\n  * needs no articles in memory, works completely on files\n3. insert new topic model into database\n  * how to interpret tm result?\n  * how to relate tm result to articles?\n4. index new articles\n  * needs title, processed text and topics\n\nfilebase writes into single file according to tm library\nneeds index where articles are stored in the file\n\nall new articles are held in memory?\n\noriginal text is not needed except for stats (original text length) and for db for ui browsing",
+			"file": "/home/eike/Downloads/JGibbLDA-v.1.0/src/jgibblda/Constants.java",
 			"settings":
 			{
-				"buffer_size": 675,
-				"line_ending": "Unix",
-				"name": "1. import all new articles:"
+				"buffer_size": 1334,
+				"line_ending": "Windows"
 			}
 		}
 	],
@@ -464,6 +463,9 @@
 	],
 	"file_history":
 	[
+		"/home/eike/Downloads/JGibbLDA-v.1.0/models/casestudy-en/model-final.others",
+		"/home/eike/Downloads/JGibbLDA-v.1.0/models/casestudy-en/model-final.twords",
+		"/home/eike/Downloads/JGibbLDA-v.1.0/models/casestudy-en/newdocs.dat",
 		"/home/eike/Repositories/fu/ss15/ma/impl/vm/data/data.json",
 		"/home/eike/Repositories/fu/ss15/ma/impl/vm/data/test-2.json",
 		"/home/eike/Repositories/fu/ss15/ma/impl/vm/data/test-1.json",
@@ -906,56 +908,61 @@
 			[
 				{
 					"buffer": 0,
+					"file": "/home/eike/Downloads/JGibbLDA-v.1.0/src/jgibblda/Constants.java",
 					"semi_transient": false,
 					"settings":
 					{
-						"buffer_size": 675,
+						"buffer_size": 1334,
 						"regions":
 						{
 						},
 						"selection":
 						[
 							[
-								579,
-								579
+								1004,
+								1004
 							]
 						],
 						"settings":
 						{
 							"BracketHighlighterBusy": false,
-							"auto_name": "1. import all new articles:",
 							"bh_regions":
 							[
-								"bh_default",
-								"bh_default_center",
-								"bh_default_open",
-								"bh_default_close",
-								"bh_default_content",
-								"bh_square",
-								"bh_square_center",
-								"bh_square_open",
-								"bh_square_close",
-								"bh_square_content",
+								"bh_unmatched",
+								"bh_unmatched_center",
+								"bh_unmatched_open",
+								"bh_unmatched_close",
+								"bh_unmatched_content",
 								"bh_round",
 								"bh_round_center",
 								"bh_round_open",
 								"bh_round_close",
 								"bh_round_content",
+								"bh_square",
+								"bh_square_center",
+								"bh_square_open",
+								"bh_square_close",
+								"bh_square_content",
+								"bh_default",
+								"bh_default_center",
+								"bh_default_open",
+								"bh_default_close",
+								"bh_default_content",
 								"bh_c_define",
 								"bh_c_define_center",
 								"bh_c_define_open",
 								"bh_c_define_close",
 								"bh_c_define_content",
-								"bh_single_quote",
-								"bh_single_quote_center",
-								"bh_single_quote_open",
-								"bh_single_quote_close",
-								"bh_single_quote_content",
-								"bh_double_quote",
-								"bh_double_quote_center",
-								"bh_double_quote_open",
-								"bh_double_quote_close",
-								"bh_double_quote_content",
+								"bh_curly",
+								"bh_curly_center",
+								"bh_curly_open",
+								"bh_curly_close",
+								"bh_curly_content",
+								"bh_regex",
+								"bh_regex_center",
+								"bh_regex_open",
+								"bh_regex_close",
+								"bh_regex_content",
 								"bh_angle",
 								"bh_angle_center",
 								"bh_angle_open",
@@ -966,24 +973,19 @@
 								"bh_tag_open",
 								"bh_tag_close",
 								"bh_tag_content",
-								"bh_regex",
-								"bh_regex_center",
-								"bh_regex_open",
-								"bh_regex_close",
-								"bh_regex_content",
-								"bh_unmatched",
-								"bh_unmatched_center",
-								"bh_unmatched_open",
-								"bh_unmatched_close",
-								"bh_unmatched_content",
-								"bh_curly",
-								"bh_curly_center",
-								"bh_curly_open",
-								"bh_curly_close",
-								"bh_curly_content"
+								"bh_single_quote",
+								"bh_single_quote_center",
+								"bh_single_quote_open",
+								"bh_single_quote_close",
+								"bh_single_quote_content",
+								"bh_double_quote",
+								"bh_double_quote_center",
+								"bh_double_quote_open",
+								"bh_double_quote_close",
+								"bh_double_quote_content"
 							],
 							"incomplete_sync": null,
-							"syntax": "Packages/Text/Plain text.tmLanguage"
+							"syntax": "Packages/Java/Java.sublime-syntax"
 						},
 						"translation.x": 0.0,
 						"translation.y": 0.0,
diff --git a/vipra-cmd/.classpath b/vipra-cmd/.classpath
index f83898bb2bbe1b2b46a8e71244628070dd23373f..40970bd0b82a9de07acfa8546c944293fd7552dd 100644
--- a/vipra-cmd/.classpath
+++ b/vipra-cmd/.classpath
@@ -34,5 +34,6 @@
 			<attribute name="maven.pomderived" value="true"/>
 		</attributes>
 	</classpathentry>
+	<classpathentry combineaccessrules="false" kind="src" path="/JGibbLDA"/>
 	<classpathentry kind="output" path="target/classes"/>
 </classpath>
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
index 38a2d8ca4b6c222134e24ca240104a2500543e7f..18f6a5d5b96133fd871dc72bb1501c888876bddd 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/JGibbFilebase.java
@@ -1,16 +1,14 @@
 package de.vipra.cmd.file;
 
-import java.io.BufferedOutputStream;
 import java.io.File;
-import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.RandomAccessFile;
 import java.util.ArrayList;
 import java.util.List;
 
 import de.vipra.cmd.ex.FilebaseException;
 import de.vipra.cmd.model.Article;
-import de.vipra.util.Constants;
-import de.vipra.util.FileUtils;
+import de.vipra.util.StringUtils;
 import de.vipra.util.ex.NotImplementedException;
 
 public class JGibbFilebase extends Filebase {
@@ -43,14 +41,32 @@ public class JGibbFilebase extends Filebase {
 
 	@Override
 	public void write() throws IOException {
-		int lineCount = FileUtils.countLines(dataFile) + articles.size();
+		boolean exists = dataFile.exists();
 
-		BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream(dataFile, true));
+		RandomAccessFile raf = new RandomAccessFile(dataFile, "rw");
+
+		// read count if exists
+		int articleCount = 0;
+		if (exists) {
+			byte[] count = new byte[10];
+			raf.read(count);
+			articleCount = Integer.parseInt(new String(count));
+		}
+
+		// write count
+		// count is padded to allow replacement with bigger numbers
+		articleCount += articles.size();
+		raf.seek(0);
+		raf.writeBytes(StringUtils.padNumber(articleCount));
+
+		// write articles
+		raf.seek(raf.length());
 		for (Article a : articles) {
-			bw.write(a.getProcessedText().getText().getBytes(Constants.FB_ENCODING));
-			bw.write(System.lineSeparator().getBytes(Constants.FB_ENCODING));
+			raf.writeBytes(System.lineSeparator());
+			raf.writeBytes(a.getProcessedText().getText());
 		}
-		bw.close();
+
+		raf.close();
 	}
 
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java
index 76f29e7801b2f7abdb622d44695472be9024e684..1d1f3e9a00ba76d0aeaa27b67995a2a9f38981ad 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/JGibbLDAAnalyzer.java
@@ -1,17 +1,38 @@
 package de.vipra.cmd.lda;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import de.vipra.cmd.ex.LDAAnalyzerException;
+import de.vipra.util.Config;
+import jgibblda.Inferencer;
+import jgibblda.LDACmdOption;
 
 public class JGibbLDAAnalyzer extends LDAAnalyzer {
 
+	public static final Logger log = LoggerFactory.getLogger(JGibbLDAAnalyzer.class);
+
+	private final LDACmdOption ldaOption;
+	private final Inferencer inferencer;
+
+	protected JGibbLDAAnalyzer() {
+		super("JGibb Analyzer");
+		this.ldaOption = new LDACmdOption();
+		this.inferencer = new Inferencer();
+	}
+
 	@Override
-	public String getName() {
-		return "JGibb Analyzer";
+	public void init(Config config) throws LDAAnalyzerException {
+		try {
+
+		} catch (Exception e) {
+			throw new LDAAnalyzerException(e);
+		}
 	}
 
 	@Override
 	public void analyze() throws LDAAnalyzerException {
-		// TODO Auto-generated method stub
+
 	}
 
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java
index 6cc4e62dd1892d42e24eb7aaf2214dff80e38c8a..b7285a722466489bfca80dd5a61111c7634540c4 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LDAAnalyzer.java
@@ -7,19 +7,34 @@ import de.vipra.util.Config.Key;
 
 public abstract class LDAAnalyzer {
 
-	public abstract String getName();
+	private final String name;
+
+	protected LDAAnalyzer(String name) {
+		this.name = name;
+	}
+
+	public String getName() {
+		return name;
+	}
+
+	public abstract void init(Config config) throws LDAAnalyzerException;
 
 	public abstract void analyze() throws LDAAnalyzerException;
 
-	public static LDAAnalyzer getAnalyzer(Config config) {
+	public static LDAAnalyzer getAnalyzer(Config config) throws LDAAnalyzerException {
+		LDAAnalyzer analyzer = null;
 		switch (Constants.Analyzer.fromString(config.getString(Key.ANALYZER))) {
 			case LDAC:
-				return new LdacLDAAnalyzer();
+				analyzer = new LdacLDAAnalyzer();
+				break;
 			case JGIBB:
 			case DEFAULT:
 			default:
-				return new JGibbLDAAnalyzer();
+				analyzer = new JGibbLDAAnalyzer();
+				break;
 		}
+		analyzer.init(config);
+		return analyzer;
 	}
 
 }
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java
index c8431e44d602c452b4d2db8dd798cc18789b3e54..0b65ea364aeb314a59ea8f6b61bdd8d20627219b 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/lda/LdacLDAAnalyzer.java
@@ -1,12 +1,18 @@
 package de.vipra.cmd.lda;
 
 import de.vipra.cmd.ex.LDAAnalyzerException;
+import de.vipra.util.Config;
 
 public class LdacLDAAnalyzer extends LDAAnalyzer {
 
+	protected LdacLDAAnalyzer() {
+		super("lda-c Analyzer");
+	}
+
 	@Override
-	public String getName() {
-		return "lda-c Analyzer";
+	public void init(Config config) throws LDAAnalyzerException {
+		// TODO Auto-generated method stub
+
 	}
 
 	@Override
diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
index a9bc04dc50f46b19d25972c1ba5601aa2452fc74..f6b20130aed5c92e0c0205b318a371f0c9b2e14d 100644
--- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
+++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java
@@ -164,7 +164,7 @@ public class ImportCommand implements Command {
 			analyzer.analyze();
 			long durAnalyze = timer.lap();
 
-			// write file index
+			// write filebase
 			filebase.close();
 			long durIndex = timer.lap();
 
diff --git a/vipra-util/src/main/java/de/vipra/util/Config.java b/vipra-util/src/main/java/de/vipra/util/Config.java
index 6a9a47fee15b121a1253566c396c3b4f30490aba..d4abb9fbff65a3fd8a76562820572fa8ffaecd7f 100644
--- a/vipra-util/src/main/java/de/vipra/util/Config.java
+++ b/vipra-util/src/main/java/de/vipra/util/Config.java
@@ -17,7 +17,7 @@ public class Config {
 		DBPORT("db.port", Constants.DEFAULT_PORT),
 		DBNAME("db.name", Constants.DEFAULT_DB),
 		DATADIR("fs.datadir", null),
-		PREPROCESSOR("an.preprocessor", Constants.DEFAULT_PREPROCESSOR.name),
+		PREPROCESSOR("an.preprocessor", Constants.DEFAULT_PROCESSOR.name),
 		ANALYZER("an.analyzer", Constants.DEFAULT_ANALYZER.name),
 		STOPWORDS("an.stopwords", "");
 
diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java
index f88b43093558b650b05c1e7c8b6b34a6ba142462..34e8043a88f1b12981d7397706cf569b86a2f070 100644
--- a/vipra-util/src/main/java/de/vipra/util/Constants.java
+++ b/vipra-util/src/main/java/de/vipra/util/Constants.java
@@ -22,7 +22,7 @@ public class Constants {
 
 	public static final String CHARS_DISALLOWED = "[^a-zA-Z0-9]";
 
-	public static final Processor DEFAULT_PREPROCESSOR = Processor.LUCENE;
+	public static final Processor DEFAULT_PROCESSOR = Processor.LUCENE;
 	public static final Analyzer DEFAULT_ANALYZER = Analyzer.JGIBB;
 
 	public static final List<String> STOPWORDS = Arrays.asList("a", "an", "and", "are", "as", "at", "be", "but", "by",
diff --git a/vipra-util/src/main/java/de/vipra/util/FileUtils.java b/vipra-util/src/main/java/de/vipra/util/FileUtils.java
index a548f9c4895d1c6bd10b8946e7984e1c597a59b5..935541e22e9b512e5706ec89d67452449470b4cb 100644
--- a/vipra-util/src/main/java/de/vipra/util/FileUtils.java
+++ b/vipra-util/src/main/java/de/vipra/util/FileUtils.java
@@ -24,6 +24,9 @@ public class FileUtils extends org.apache.commons.io.FileUtils {
 	}
 
 	public static int countLines(File file) throws IOException {
+		if (!file.exists()) {
+			return 0;
+		}
 		InputStream is = new BufferedInputStream(new FileInputStream(file));
 		try {
 			byte[] c = new byte[1024];
diff --git a/vipra-util/src/main/java/de/vipra/util/StringUtils.java b/vipra-util/src/main/java/de/vipra/util/StringUtils.java
index 15f96f9f026099669bb724194f4fc88475c008cf..ffac45c98a423aba0a48258b14fe6b0ba165206e 100644
--- a/vipra-util/src/main/java/de/vipra/util/StringUtils.java
+++ b/vipra-util/src/main/java/de/vipra/util/StringUtils.java
@@ -70,4 +70,12 @@ public class StringUtils {
 		return StringUtils.join(parts);
 	}
 
+	public static String padNumber(int lineCount) {
+		String lc = Integer.toString(lineCount);
+		while (lc.length() < 10) {
+			lc = "0" + lc;
+		}
+		return lc;
+	}
+
 }