From 4e3e7131f5306fe4d3ccfdb3f9c706c46f62fb19 Mon Sep 17 00:00:00 2001 From: Eike Cochu <eike@cochu.com> Date: Wed, 23 Dec 2015 21:11:46 +0100 Subject: [PATCH] updated data.json date format for vipra-cmd import tested vipra-cmd import and word stats --- ma-impl.sublime-workspace | 225 ++---------------- .../.settings/org.eclipse.jdt.core.prefs | 6 +- vipra-cmd/pom.xml | 2 +- .../main/java/de/vipra/cmd/model/Article.java | 12 +- .../de/vipra/cmd/option/ImportCommand.java | 3 +- vipra-cmd/src/main/resources/log4j2.xml | 1 - vipra-rest/pom.xml | 2 +- .../rest/provider/ObjectMapperProvider.java | 3 +- .../de/vipra/rest/serializer/JsonHelper.java | 6 +- .../de/vipra/rest/service/ArticleService.java | 2 + .../.settings/org.eclipse.jdt.core.prefs | 6 +- vipra-util/pom.xml | 2 +- .../main/java/de/vipra/util/Constants.java | 2 + .../java/de/vipra/util/model/Article.java | 22 +- .../de/vipra/util/model/ArticleStats.java | 39 ++- .../de/vipra/util/model/BsonDocument.java | 11 + .../main/java/de/vipra/util/model/Model.java | 2 +- .../de/vipra/util/model/TermFrequency.java | 40 +++- 18 files changed, 140 insertions(+), 246 deletions(-) create mode 100644 vipra-util/src/main/java/de/vipra/util/model/BsonDocument.java diff --git a/ma-impl.sublime-workspace b/ma-impl.sublime-workspace index 41ee8289..5c7ab397 100644 --- a/ma-impl.sublime-workspace +++ b/ma-impl.sublime-workspace @@ -271,22 +271,6 @@ }, "buffers": [ - { - "file": "Vagrantfile", - "settings": - { - "buffer_size": 955, - "line_ending": "Unix" - } - }, - { - "contents": "", - "settings": - { - "buffer_size": 0, - "line_ending": "Unix" - } - } ], "build_system": "", "build_system_choices": @@ -466,16 +450,17 @@ "expanded_folders": [ "/home/eike/Repositories/fu/ss15/ma/impl", - "/home/eike/Repositories/fu/ss15/ma/impl/vm" + "/home/eike/Repositories/fu/ss15/ma/impl/vm", + "/home/eike/Repositories/fu/ss15/ma/impl/vm/data" ], "file_history": [ + "/home/eike/Repositories/fu/ss15/ma/impl/vm/data/data.json", + "/home/eike/Repositories/fu/ss15/ma/impl/vm/data/test-2.json", + "/home/eike/Repositories/fu/ss15/ma/impl/vm/data/test-1.json", "/home/eike/.cache/.fr-rPaUI0/LICENSE.txt", "/home/eike/Repositories/fu/ss15/ma/impl/vipra-rest/src/main/resources/log4j2.xml", "/home/eike/Repositories/fu/ss15/ma/impl/vipra-config/log4j2.xml", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/data/test-1.json", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/data/test-2.json", - "/home/eike/Repositories/fu/ss15/ma/impl/vm/data/data.json", "/home/eike/Repositories/fu/ss15/ma/impl/vipra-cmd/test/test-1.json", "/home/eike/Repositories/fu/ss15/ma/impl/vipra-cmd/vipra-cmd.sh", "/home/eike/Repositories/fu/ss15/ma/impl/vipra-ui/app/components/article-list.js", @@ -634,6 +619,9 @@ "case_sensitive": false, "find_history": [ + "00:00Z\" },", + "{ \"$date\": ", + ".000+0000", "json-api", "\"id\":.*?\\n ", "\"id\":.*?\\n", @@ -758,17 +746,17 @@ "redirect_intended", "amount-in", "'", - "!important", - "images", - "url(\"../images/", - "fa-var" + "!important" ], "highlight": true, "in_selection": false, "preserve_case": false, - "regex": true, + "regex": false, "replace_history": [ + "00:00Z\",", + "", + "Z", "", "\"id\":", "000+0000\"", @@ -893,10 +881,7 @@ "survey", "SurveysController", "", - "/assets", - "Config::get", - "Lang::t", - "@__" + "/assets" ], "reverse": false, "show_context": true, @@ -907,190 +892,8 @@ "groups": [ { - "selected": 1, "sheets": [ - { - "buffer": 0, - "file": "Vagrantfile", - "semi_transient": true, - "settings": - { - "buffer_size": 955, - "regions": - { - }, - "selection": - [ - [ - 955, - 955 - ] - ], - "settings": - { - "BracketHighlighterBusy": false, - "bh_regions": - [ - "bh_square", - "bh_square_center", - "bh_square_open", - "bh_square_close", - "bh_square_content", - "bh_default", - "bh_default_center", - "bh_default_open", - "bh_default_close", - "bh_default_content", - "bh_single_quote", - "bh_single_quote_center", - "bh_single_quote_open", - "bh_single_quote_close", - "bh_single_quote_content", - "bh_round", - "bh_round_center", - "bh_round_open", - "bh_round_close", - "bh_round_content", - "bh_tag", - "bh_tag_center", - "bh_tag_open", - "bh_tag_close", - "bh_tag_content", - "bh_double_quote", - "bh_double_quote_center", - "bh_double_quote_open", - "bh_double_quote_close", - "bh_double_quote_content", - "bh_regex", - "bh_regex_center", - "bh_regex_open", - "bh_regex_close", - "bh_regex_content", - "bh_c_define", - "bh_c_define_center", - "bh_c_define_open", - "bh_c_define_close", - "bh_c_define_content", - "bh_curly", - "bh_curly_center", - "bh_curly_open", - "bh_curly_close", - "bh_curly_content", - "bh_angle", - "bh_angle_center", - "bh_angle_open", - "bh_angle_close", - "bh_angle_content", - "bh_unmatched", - "bh_unmatched_center", - "bh_unmatched_open", - "bh_unmatched_close", - "bh_unmatched_content" - ], - "incomplete_sync": null, - "remote_loading": false, - "synced": false, - "syntax": "Packages/Ruby/Ruby.sublime-syntax", - "tab_size": 2, - "translate_tabs_to_spaces": true - }, - "translation.x": 0.0, - "translation.y": 0.0, - "zoom_level": 1.0 - }, - "stack_index": 1, - "type": "text" - }, - { - "buffer": 1, - "semi_transient": false, - "settings": - { - "buffer_size": 0, - "regions": - { - }, - "selection": - [ - [ - 0, - 0 - ] - ], - "settings": - { - "BracketHighlighterBusy": false, - "auto_name": "", - "bh_regions": - [ - "bh_curly", - "bh_curly_center", - "bh_curly_open", - "bh_curly_close", - "bh_curly_content", - "bh_tag", - "bh_tag_center", - "bh_tag_open", - "bh_tag_close", - "bh_tag_content", - "bh_default", - "bh_default_center", - "bh_default_open", - "bh_default_close", - "bh_default_content", - "bh_single_quote", - "bh_single_quote_center", - "bh_single_quote_open", - "bh_single_quote_close", - "bh_single_quote_content", - "bh_unmatched", - "bh_unmatched_center", - "bh_unmatched_open", - "bh_unmatched_close", - "bh_unmatched_content", - "bh_c_define", - "bh_c_define_center", - "bh_c_define_open", - "bh_c_define_close", - "bh_c_define_content", - "bh_double_quote", - "bh_double_quote_center", - "bh_double_quote_open", - "bh_double_quote_close", - "bh_double_quote_content", - "bh_angle", - "bh_angle_center", - "bh_angle_open", - "bh_angle_close", - "bh_angle_content", - "bh_round", - "bh_round_center", - "bh_round_open", - "bh_round_close", - "bh_round_content", - "bh_square", - "bh_square_center", - "bh_square_open", - "bh_square_close", - "bh_square_content", - "bh_regex", - "bh_regex_center", - "bh_regex_open", - "bh_regex_close", - "bh_regex_content" - ], - "default_dir": "/home/eike/Repositories/fu/ss15/ma/impl", - "incomplete_sync": null, - "syntax": "Packages/Text/Plain text.tmLanguage" - }, - "translation.x": 0.0, - "translation.y": 0.0, - "zoom_level": 1.0 - }, - "stack_index": 0, - "type": "text" - } ] } ], diff --git a/vipra-cmd/.settings/org.eclipse.jdt.core.prefs b/vipra-cmd/.settings/org.eclipse.jdt.core.prefs index cf43e319..78a9b450 100644 --- a/vipra-cmd/.settings/org.eclipse.jdt.core.prefs +++ b/vipra-cmd/.settings/org.eclipse.jdt.core.prefs @@ -278,10 +278,10 @@ org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_decla org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert org.eclipse.jdt.core.formatter.join_lines_in_comments=true org.eclipse.jdt.core.formatter.join_wrapped_lines=true -org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=true +org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false -org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=true -org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=true +org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false +org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false org.eclipse.jdt.core.formatter.lineSplit=120 org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false diff --git a/vipra-cmd/pom.xml b/vipra-cmd/pom.xml index c237d34b..d07d0776 100644 --- a/vipra-cmd/pom.xml +++ b/vipra-cmd/pom.xml @@ -85,7 +85,7 @@ <dependency> <groupId>org.mongodb</groupId> <artifactId>mongodb-driver</artifactId> - <version>3.0.4</version> + <version>3.2.0</version> </dependency> <!-- Testing --> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/model/Article.java b/vipra-cmd/src/main/java/de/vipra/cmd/model/Article.java index 14fe2ea3..a29161db 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/model/Article.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/model/Article.java @@ -5,10 +5,14 @@ import org.json.simple.JSONObject; public class Article extends de.vipra.util.model.Article { public void fromJSON(JSONObject obj) { - if (obj.containsKey("title")) setTitle(obj.get("title").toString()); - if (obj.containsKey("text")) setText(obj.get("text").toString()); - if (obj.containsKey("url")) setUrl(obj.get("url").toString()); - if (obj.containsKey("date")) setDate(obj.get("date").toString()); + if (obj.containsKey("title")) + setTitle(obj.get("title").toString()); + if (obj.containsKey("text")) + setText(obj.get("text").toString()); + if (obj.containsKey("url")) + setUrl(obj.get("url").toString()); + if (obj.containsKey("date")) + setDate(obj.get("date").toString()); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 36fcf9c7..d31a7563 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -50,6 +50,7 @@ public class ImportCommand implements Command { } public static final Logger log = LoggerFactory.getLogger(ImportCommand.class); + public static final Logger out = LoggerFactory.getLogger("shellout"); private ArrayList<File> files = new ArrayList<>(); private JSONParser parser = new JSONParser(); @@ -126,7 +127,7 @@ public class ImportCommand implements Command { } void importArticle(JSONObject obj) throws ImportException { - log.info("importing \"" + StringUtils.ellipsize(obj.get("title").toString(), 80) + "\""); + out.info("importing \"" + StringUtils.ellipsize(obj.get("title").toString(), 80) + "\""); Article article = new Article(); article.fromJSON(obj); diff --git a/vipra-cmd/src/main/resources/log4j2.xml b/vipra-cmd/src/main/resources/log4j2.xml index 3a6e439c..c28b8da1 100644 --- a/vipra-cmd/src/main/resources/log4j2.xml +++ b/vipra-cmd/src/main/resources/log4j2.xml @@ -10,6 +10,5 @@ <AppenderRef ref="Console" /> </Root> <Logger name="shellout" level="ALL"/> - <Logger name="org.mongodb" level="ERROR"/> </Loggers> </Configuration> \ No newline at end of file diff --git a/vipra-rest/pom.xml b/vipra-rest/pom.xml index a7b384aa..0f1fc40d 100644 --- a/vipra-rest/pom.xml +++ b/vipra-rest/pom.xml @@ -83,7 +83,7 @@ <dependency> <groupId>org.mongodb</groupId> <artifactId>mongodb-driver</artifactId> - <version>3.0.4</version> + <version>3.2.0</version> </dependency> <!-- Testing --> diff --git a/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java b/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java index 4c8231ad..b748acc0 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java +++ b/vipra-rest/src/main/java/de/vipra/rest/provider/ObjectMapperProvider.java @@ -16,6 +16,7 @@ import com.fasterxml.jackson.databind.module.SimpleModule; import de.vipra.rest.model.Article; import de.vipra.rest.serializer.ArticleDeserializer; import de.vipra.rest.serializer.ArticleSerializer; +import de.vipra.util.Constants; @Provider public class ObjectMapperProvider implements ContextResolver<ObjectMapper> { @@ -41,7 +42,7 @@ public class ObjectMapperProvider implements ContextResolver<ObjectMapper> { final ObjectMapper mapper = new ObjectMapper(); mapper.enable(SerializationFeature.INDENT_OUTPUT); mapper.setSerializationInclusion(Include.NON_NULL); - mapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'")); + mapper.setDateFormat(new SimpleDateFormat(Constants.DATETIME_FORMAT)); mapper.registerModule(module); return mapper; } diff --git a/vipra-rest/src/main/java/de/vipra/rest/serializer/JsonHelper.java b/vipra-rest/src/main/java/de/vipra/rest/serializer/JsonHelper.java index 10bfca3a..a15ff8ae 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/serializer/JsonHelper.java +++ b/vipra-rest/src/main/java/de/vipra/rest/serializer/JsonHelper.java @@ -7,6 +7,8 @@ import java.util.Date; import com.fasterxml.jackson.databind.JsonNode; +import de.vipra.util.Constants; + public class JsonHelper { public static <T> T get(JsonNode node, String name, T defaultValue, Class<T> type) { @@ -45,12 +47,12 @@ public class JsonHelper { } public static String dateToString(Date date) { - DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); + DateFormat df = new SimpleDateFormat(Constants.DATETIME_FORMAT); return df.format(date); } public static Date stringToDate(String source) { - DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); + DateFormat df = new SimpleDateFormat(Constants.DATETIME_FORMAT); try { return df.parse(source); } catch (ParseException e) { diff --git a/vipra-rest/src/main/java/de/vipra/rest/service/ArticleService.java b/vipra-rest/src/main/java/de/vipra/rest/service/ArticleService.java index 3ed4a615..139413e1 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/service/ArticleService.java +++ b/vipra-rest/src/main/java/de/vipra/rest/service/ArticleService.java @@ -26,7 +26,9 @@ public class ArticleService extends DatabaseService<Article> { public ArrayList<Article> getArticles(URI base, int skip, int limit, String sortBy) { ArrayList<Article> articles = super.getMultiple(skip, limit, sortBy); for (Article article : articles) { + // delete data for listing article.setText(null); + article.setStats(null); article.setBase(base); } return articles; diff --git a/vipra-util/.settings/org.eclipse.jdt.core.prefs b/vipra-util/.settings/org.eclipse.jdt.core.prefs index 57fd9ccb..0e1f9aa3 100644 --- a/vipra-util/.settings/org.eclipse.jdt.core.prefs +++ b/vipra-util/.settings/org.eclipse.jdt.core.prefs @@ -277,10 +277,10 @@ org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_decla org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert org.eclipse.jdt.core.formatter.join_lines_in_comments=true org.eclipse.jdt.core.formatter.join_wrapped_lines=true -org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=true +org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false -org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=true -org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=true +org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false +org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false org.eclipse.jdt.core.formatter.lineSplit=120 org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false diff --git a/vipra-util/pom.xml b/vipra-util/pom.xml index 59d8436d..f0bfa6d7 100644 --- a/vipra-util/pom.xml +++ b/vipra-util/pom.xml @@ -41,7 +41,7 @@ <dependency> <groupId>org.mongodb</groupId> <artifactId>mongodb-driver</artifactId> - <version>3.0.4</version> + <version>3.2.0</version> </dependency> <!-- ElasticSearch Adapter --> diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 4aa09bf5..0655844e 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -14,6 +14,8 @@ public class Constants { public static final String DEFAULT_HOST = "localhost"; public static final int DEFAULT_PORT = 27017; public static final String DEFAULT_DB = "test"; + + public static final String DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'Z'"; public static enum Collection { ARTICLES("articles"); diff --git a/vipra-util/src/main/java/de/vipra/util/model/Article.java b/vipra-util/src/main/java/de/vipra/util/model/Article.java index a121c216..8d149016 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Article.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Article.java @@ -8,9 +8,10 @@ import java.util.Date; import java.util.List; import org.bson.Document; -import org.bson.types.ObjectId; +import de.vipra.util.Constants; import de.vipra.util.FileUtils; +import de.vipra.util.MongoUtils; import de.vipra.util.StringUtils; public class Article extends Model { @@ -71,7 +72,7 @@ public class Article extends Model { } public void setDate(String date) { - SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); + SimpleDateFormat df = new SimpleDateFormat(Constants.DATETIME_FORMAT); try { setDate(df.parse(date)); } catch (ParseException e) {} @@ -84,11 +85,16 @@ public class Article extends Model { @Override public Document toDocument() { - Document doc = new Document("title", title).append("text", text).append("url", url).append("date", date); - if (getId() != null) { - doc.append("_id", new ObjectId(getId())); - } - return doc; + Document document = new Document(); + if (getId() != null) + document.put("_id", MongoUtils.objectId(getId())); + document.put("title", getTitle()); + document.put("text", getText()); + document.put("url", getUrl()); + document.put("date", getDate()); + if (getStats() != null) + document.put("stats", getStats().toDocument()); + return document; } @Override @@ -98,6 +104,8 @@ public class Article extends Model { setText(document.getString("text")); setUrl(document.getString("url")); setDate(document.getDate("date")); + if (document.containsKey("stats")) + setStats(new ArticleStats((Document) document.get("stats"))); } @Override diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java index dc06372e..2b4cc36c 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java @@ -3,12 +3,20 @@ package de.vipra.util.model; import java.util.HashMap; import java.util.Map; -public class ArticleStats { +import org.bson.Document; + +public class ArticleStats implements BsonDocument { private long wordCount; private long uniqueWordCount; private Map<String, TermFrequency> uniqueWords; + public ArticleStats() {} + + public ArticleStats(Document document) { + fromDocument(document); + } + public long getWordCount() { return wordCount; } @@ -61,4 +69,33 @@ public class ArticleStats { return stats; } + @Override + public Document toDocument() { + Document document = new Document(); + document.put("wordCount", getWordCount()); + document.put("uniqueWordCount", getUniqueWordCount()); + if (getUniqueWords() != null) { + Document uniqueWords = new Document(); + for (Map.Entry<String, TermFrequency> entry : getUniqueWords().entrySet()) { + uniqueWords.put(entry.getKey(), entry.getValue().toDocument()); + } + document.put("uniqueWords", uniqueWords); + } + return document; + } + + @Override + public void fromDocument(Document document) { + setWordCount(document.getLong("wordCount")); + setUniqueWordCount(document.getLong("uniqueWordCount")); + if (document.containsKey("uniqueWords")) { + Map<String, TermFrequency> uniqueWords = new HashMap<>(); + Document docUniqueWords = (Document) document.get("uniqueWords"); + for (Map.Entry<String, Object> entry : docUniqueWords.entrySet()) { + uniqueWords.put(entry.getKey(), new TermFrequency((Document) entry.getValue())); + } + setUniqueWords(uniqueWords); + } + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/model/BsonDocument.java b/vipra-util/src/main/java/de/vipra/util/model/BsonDocument.java new file mode 100644 index 00000000..0a606205 --- /dev/null +++ b/vipra-util/src/main/java/de/vipra/util/model/BsonDocument.java @@ -0,0 +1,11 @@ +package de.vipra.util.model; + +import org.bson.Document; + +public interface BsonDocument { + + Document toDocument(); + + void fromDocument(Document document); + +} diff --git a/vipra-util/src/main/java/de/vipra/util/model/Model.java b/vipra-util/src/main/java/de/vipra/util/model/Model.java index 264b7ab6..db4f5ec1 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Model.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Model.java @@ -10,7 +10,7 @@ import org.bson.Document; import de.vipra.util.Constants; -public abstract class Model { +public abstract class Model implements BsonDocument { private String id; diff --git a/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java b/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java index 8008d7ec..c97fa1e0 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TermFrequency.java @@ -1,10 +1,18 @@ package de.vipra.util.model; -public class TermFrequency { +import org.bson.Document; + +public class TermFrequency implements BsonDocument { private long termFrequency = 0; - private long normalizedTermFrequency = 0; - private long inverseDocumentFrequency = 0; + private double normalizedTermFrequency = 0; + private double inverseDocumentFrequency = 0; + + public TermFrequency() {} + + public TermFrequency(Document document) { + fromDocument(document); + } public long getTermFrequency() { return termFrequency; @@ -14,15 +22,15 @@ public class TermFrequency { this.termFrequency = termFrequency; } - public long getNormalizedTermFrequency() { + public double getNormalizedTermFrequency() { return normalizedTermFrequency; } - public void setNormalizedTermFrequency(long normalizedTermFrequency) { + public void setNormalizedTermFrequency(double normalizedTermFrequency) { this.normalizedTermFrequency = normalizedTermFrequency; } - public void normalizeTermFrequency(long max) { + public void normalizeTermFrequency(double max) { setNormalizedTermFrequency(getNormalizedTermFrequency() / max); } @@ -30,12 +38,28 @@ public class TermFrequency { setTermFrequency(getTermFrequency() + 1); } - public long getInverseDocumentFrequency() { + public double getInverseDocumentFrequency() { return inverseDocumentFrequency; } - public void setInverseDocumentFrequency(long inverseDocumentFrequency) { + public void setInverseDocumentFrequency(double inverseDocumentFrequency) { this.inverseDocumentFrequency = inverseDocumentFrequency; } + @Override + public Document toDocument() { + Document document = new Document(); + document.put("tf", getTermFrequency()); + document.put("ntf", getNormalizedTermFrequency()); + document.put("idf", getInverseDocumentFrequency()); + return document; + } + + @Override + public void fromDocument(Document document) { + setTermFrequency(document.getLong("tf")); + setNormalizedTermFrequency(document.getDouble("ntf")); + setInverseDocumentFrequency(document.getDouble("idf")); + } + } -- GitLab