From 234a47e95290fce80116b0ce5b26c1d1bfcac103 Mon Sep 17 00:00:00 2001 From: Eike Cochu <eike@cochu.com> Date: Mon, 25 Jan 2016 22:03:22 +0100 Subject: [PATCH] updated word import all words of each article will be imported, instead of only words of topics updated ui, still needs pagination --- ma-impl.sublime-workspace | 119 +++++++++++++++++- .../de/vipra/cmd/option/ImportCommand.java | 11 ++ .../java/de/vipra/cmd/text/ProcessedText.java | 12 +- .../java/de/vipra/cmd/text/Processor.java | 2 +- .../vipra/rest/resource/ImportResource.java | 4 +- vipra-ui/app/routes/articles/index.js | 3 +- vipra-ui/app/routes/index.js | 22 +--- vipra-ui/app/routes/words/index.js | 2 +- vipra-ui/app/styles/app.scss | 4 + vipra-ui/app/templates/words/index.hbs | 7 +- .../main/java/de/vipra/util/Constants.java | 7 +- .../src/main/java/de/vipra/util/WordMap.java | 10 +- .../java/de/vipra/util/model/Article.java | 2 +- .../java/de/vipra/util/model/ArticleFull.java | 2 +- .../de/vipra/util/model/ArticleStats.java | 18 +-- .../main/java/de/vipra/util/model/Topic.java | 2 +- .../main/java/de/vipra/util/model/Word.java | 33 ++++- .../vipra/util/service/DatabaseService.java | 2 +- 18 files changed, 206 insertions(+), 56 deletions(-) diff --git a/ma-impl.sublime-workspace b/ma-impl.sublime-workspace index ba2c4d36..b05d059f 100644 --- a/ma-impl.sublime-workspace +++ b/ma-impl.sublime-workspace @@ -279,6 +279,31 @@ }, "buffers": [ + { + "file": "vipra-ui/app/routes/index.js", + "settings": + { + "buffer_size": 666, + "line_ending": "Unix" + } + }, + { + "file": "vipra-ui/app/templates/index.hbs", + "settings": + { + "buffer_size": 1247, + "line_ending": "Unix" + } + }, + { + "contents": "pagination\nindexing + search\ntopic network visualization\n\ndynamic nmf python\n\nuser:\naccenture", + "settings": + { + "buffer_size": 93, + "line_ending": "Unix", + "name": "pagination" + } + } ], "build_system": "", "build_system_choices": @@ -460,10 +485,8 @@ "/home/eike/repos/master/ma-impl", "/home/eike/repos/master/ma-impl/vipra-ui", "/home/eike/repos/master/ma-impl/vipra-ui/app", - "/home/eike/repos/master/ma-impl/vipra-ui/app/models", + "/home/eike/repos/master/ma-impl/vipra-ui/app/helpers", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes", - "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/articles", - "/home/eike/repos/master/ma-impl/vipra-ui/app/styles", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates" ], "file_history": @@ -919,8 +942,98 @@ "groups": [ { + "selected": 2, "sheets": [ + { + "buffer": 0, + "file": "vipra-ui/app/routes/index.js", + "semi_transient": false, + "settings": + { + "buffer_size": 666, + "regions": + { + }, + "selection": + [ + [ + 333, + 333 + ] + ], + "settings": + { + "syntax": "Packages/JavaScriptNext - ES6 Syntax/JavaScriptNext.tmLanguage", + "tab_size": 2, + "translate_tabs_to_spaces": true + }, + "translation.x": 0.0, + "translation.y": 0.0, + "zoom_level": 1.0 + }, + "stack_index": 1, + "type": "text" + }, + { + "buffer": 1, + "file": "vipra-ui/app/templates/index.hbs", + "semi_transient": true, + "settings": + { + "buffer_size": 1247, + "regions": + { + }, + "selection": + [ + [ + 478, + 478 + ] + ], + "settings": + { + "syntax": "Packages/Handlebars/grammars/Handlebars.tmLanguage", + "tab_size": 2, + "translate_tabs_to_spaces": true + }, + "translation.x": 0.0, + "translation.y": 0.0, + "zoom_level": 1.0 + }, + "stack_index": 2, + "type": "text" + }, + { + "buffer": 2, + "semi_transient": false, + "settings": + { + "buffer_size": 93, + "regions": + { + }, + "selection": + [ + [ + 83, + 83 + ] + ], + "settings": + { + "auto_name": "pagination", + "default_dir": "/home/eike/repos/master/ma-impl", + "syntax": "Packages/Text/Plain text.tmLanguage" + }, + "translation.x": 0.0, + "translation.y": 0.0, + "zoom_level": 1.0 + }, + "stack_index": 0, + "type": "text" + } ] } ], diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 7cafd4a3..0d88b358 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -121,6 +121,10 @@ public class ImportCommand implements Command { article.setProcessedText(processedText); article.setStats(articleStats); article = dbArticles.createSingle(article); + + // add words + for(String word : processedText.getWords()) + wordMap.add(word); // add article to filebase filebase.add(article); @@ -267,6 +271,13 @@ public class ImportCommand implements Command { importOp.setWords(importedWords); timer.lap("saving topic refs"); + /* + * save words + */ + out.info("saving words"); + wordMap.create(); + timer.lap("saving words"); + /* * save import information */ diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java index 10a4d01d..84d3b6b0 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java @@ -1,15 +1,21 @@ package de.vipra.cmd.text; +import de.vipra.util.StringUtils; + public final class ProcessedText { - private final String text; + private final String[] words; public ProcessedText(String text) { - this.text = text; + this.words = text.split("\\s+"); + } + + public String[] getWords() { + return words; } public String getText() { - return text; + return StringUtils.join(words); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java index 58351b30..0d11b2af 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java @@ -39,7 +39,7 @@ public abstract class Processor { public static String clean(String in) { return in.replaceAll(Constants.REGEX_EMAIL, "").replaceAll(Constants.REGEX_URL, "") .replaceAll(Constants.REGEX_NUMBER, "").replaceAll(Constants.CHARS_DISALLOWED, "") - .replaceAll("\\s+", " ").trim(); + .replaceAll(Constants.REGEX_SINGLECHAR, "").replaceAll("\\s+", " ").trim(); } } diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/ImportResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/ImportResource.java index f4ba2086..0f144c86 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/resource/ImportResource.java +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/ImportResource.java @@ -65,7 +65,7 @@ public class ImportResource { return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); try { - List<Import> imports = service.getMultiple(skip, limit, sortBy, false, StringUtils.getFields(fields)); + List<Import> imports = service.getMultiple(skip, limit, sortBy, StringUtils.getFields(fields)); if ((skip != null && skip > 0) || (limit != null && limit > 0)) res.addMeta("total", service.count()); @@ -85,7 +85,7 @@ public class ImportResource { @Path("latest") public Response getLatestImport(@QueryParam("fields") String fields) { Wrapper<Import> res = new Wrapper<>(); - List<Import> latestImport = service.getMultiple(0, 1, "date", StringUtils.getFields(fields)); + List<Import> latestImport = service.getMultiple(0, 1, "date", false, StringUtils.getFields(fields)); if (latestImport == null || latestImport.size() != 1) { return res.noContent(); diff --git a/vipra-ui/app/routes/articles/index.js b/vipra-ui/app/routes/articles/index.js index 246a6e2b..e8b133dd 100644 --- a/vipra-ui/app/routes/articles/index.js +++ b/vipra-ui/app/routes/articles/index.js @@ -1,5 +1,6 @@ import Ember from 'ember'; +var pagelimit = 100; var chartData = []; var chartOptions = { chart: { @@ -33,7 +34,7 @@ export default Ember.Route.extend({ return Ember.RSVP.hash({ articles: this.store.query('article', { skip: 0, - limit: 100 + limit: pagelimit }), chartOptions: chartOptions, chartData: chartData diff --git a/vipra-ui/app/routes/index.js b/vipra-ui/app/routes/index.js index e928d1e7..2e701b60 100644 --- a/vipra-ui/app/routes/index.js +++ b/vipra-ui/app/routes/index.js @@ -2,22 +2,10 @@ import Ember from 'ember'; export default Ember.Route.extend({ model() { - return Ember.RSVP.hash({ - imports: this.store.findAll('import', { - skip: 0, - limit: 5 - }), - - latestimport: this.store.find('import', 'latest') - }); - }, - - afterModel(model) { - let articles = model.latestimport.get('articles'), - topics = model.latestimport.get('topics'), - words = model.latestimport.get('words'); - model.latestarticles = articles.slice(Math.max(articles.length - 5, 0)); - model.latesttopics = topics.slice(Math.max(topics.length - 5, 0)); - model.latestwords = words.slice(Math.max(words.length - 5, 0)); + return { + latestarticles: this.store.query('article', {limit: 5, sort: '-created'}), + latesttopics: this.store.query('topic', {limit: 5, sort: '-created'}), + latestwords: this.store.query('word', {limit: 5, sort: '-created'}), + }; } }); \ No newline at end of file diff --git a/vipra-ui/app/routes/words/index.js b/vipra-ui/app/routes/words/index.js index e83ce4c6..58b02ec3 100644 --- a/vipra-ui/app/routes/words/index.js +++ b/vipra-ui/app/routes/words/index.js @@ -3,7 +3,7 @@ import Ember from 'ember'; export default Ember.Route.extend({ model() { return Ember.RSVP.hash({ - words: this.store.findAll('word') + words: this.store.query('word', {sort:'word'}) }); } }); \ No newline at end of file diff --git a/vipra-ui/app/styles/app.scss b/vipra-ui/app/styles/app.scss index e072b7b5..f3263a02 100644 --- a/vipra-ui/app/styles/app.scss +++ b/vipra-ui/app/styles/app.scss @@ -43,6 +43,10 @@ body { text-overflow: ellipsis; } +.wordfilters a { + padding-right: 8px; +} + .navbar-default { .navbar-nav { &> .active { diff --git a/vipra-ui/app/templates/words/index.hbs b/vipra-ui/app/templates/words/index.hbs index c1de00df..548c404f 100644 --- a/vipra-ui/app/templates/words/index.hbs +++ b/vipra-ui/app/templates/words/index.hbs @@ -2,7 +2,12 @@ <br> <p> - {{model.words.length}} {{pluralize model.words.length 'Word'}} in the database: + {{model.words.length}} {{pluralize model.words.length 'Word'}} in the database +</p> + +<br> +<p class="wordfilters"> +<a href="#" {{action "wordfilter" "A"}}>A</a> <a href="#" {{action "wordfilter" "B"}}>B</a> <a href="#" {{action "wordfilter" "C"}}>C</a> <a href="#" {{action "wordfilter" "D"}}>D</a> <a href="#" {{action "wordfilter" "E"}}>E</a> <a href="#" {{action "wordfilter" "F"}}>F</a> <a href="#" {{action "wordfilter" "G"}}>G</a> <a href="#" {{action "wordfilter" "H"}}>H</a> <a href="#" {{action "wordfilter" "I"}}>I</a> <a href="#" {{action "wordfilter" "J"}}>J</a> <a href="#" {{action "wordfilter" "K"}}>K</a> <a href="#" {{action "wordfilter" "L"}}>L</a> <a href="#" {{action "wordfilter" "M"}}>M</a> <a href="#" {{action "wordfilter" "N"}}>N</a> <a href="#" {{action "wordfilter" "O"}}>O</a> <a href="#" {{action "wordfilter" "P"}}>P</a> <a href="#" {{action "wordfilter" "Q"}}>Q</a> <a href="#" {{action "wordfilter" "R"}}>R</a> <a href="#" {{action "wordfilter" "S"}}>S</a> <a href="#" {{action "wordfilter" "T"}}>T</a> <a href="#" {{action "wordfilter" "U"}}>U</a> <a href="#" {{action "wordfilter" "V"}}>V</a> <a href="#" {{action "wordfilter" "W"}}>W</a> <a href="#" {{action "wordfilter" "X"}}>X</a> <a href="#" {{action "wordfilter" "Y"}}>Y</a> <a href="#" {{action "wordfilter" "Z"}}>Z</a> </p> {{#each model.words as |word|}} diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 7f3338f5..870b3a85 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -156,7 +156,12 @@ public class Constants { /** * Regular expressiong to find and remove numbers from text. */ - public static final String REGEX_NUMBER = "\\b[0-9]+\\b"; + public static final String REGEX_NUMBER = "\\b\\w*\\d+\\w*\\b"; + + /** + * Regular expression to find and remove single char words. + */ + public static final String REGEX_SINGLECHAR = "\\b\\w\\b"; /* * OTHER diff --git a/vipra-util/src/main/java/de/vipra/util/WordMap.java b/vipra-util/src/main/java/de/vipra/util/WordMap.java index 792f034e..8d11d931 100644 --- a/vipra-util/src/main/java/de/vipra/util/WordMap.java +++ b/vipra-util/src/main/java/de/vipra/util/WordMap.java @@ -20,7 +20,7 @@ public class WordMap { private final DatabaseService<Word, String> dbWords; private final Map<String, Word> wordMap; private final List<Word> newWords; - private boolean createNow = true; + private boolean createNow = false; public WordMap(DatabaseService<Word, String> dbWords) { this.dbWords = dbWords; @@ -32,8 +32,8 @@ public class WordMap { } public Word get(Object w) { - String strWord = w.toString(); - Word word = wordMap.get(strWord.toLowerCase()); + String strWord = w.toString().toLowerCase(); + Word word = wordMap.get(strWord); if (word == null) { word = new Word(strWord); createWord(word); @@ -42,6 +42,10 @@ public class WordMap { return word; } + public void add(Object w) { + get(w); + } + private Word createWord(Word word) { if (createNow) { try { diff --git a/vipra-util/src/main/java/de/vipra/util/model/Article.java b/vipra-util/src/main/java/de/vipra/util/model/Article.java index e127b6e3..892dcbad 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Article.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Article.java @@ -13,7 +13,7 @@ import de.vipra.util.an.JsonType; @SuppressWarnings("serial") @JsonType("article") @Entity(value = "articles", noClassnameStored = true) -@Indexes({ @Index("title"), @Index("date") }) +@Indexes({ @Index("title"), @Index("date"), @Index("-created") }) public class Article implements Model<ObjectId>, Serializable { @Id diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java index 6fe77f9c..26b42d38 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java @@ -27,7 +27,7 @@ import de.vipra.util.an.QueryIgnore; @SuppressWarnings("serial") @JsonType("article") @Entity(value = "articles", noClassnameStored = true) -@Indexes({ @Index("title"), @Index("date") }) +@Indexes({ @Index("title"), @Index("date"), @Index("-created") }) public class ArticleFull extends FileModel<ObjectId> implements Serializable { @Id diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java index db1b8ad3..4449f5b2 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java @@ -2,29 +2,17 @@ package de.vipra.util.model; import java.io.Serializable; -import org.bson.types.ObjectId; -import org.mongodb.morphia.annotations.Entity; -import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.Embedded; import de.vipra.util.WordMap; -@Entity +@Embedded public class ArticleStats implements Serializable { private static final long serialVersionUID = -4712841724990200627L; - @Id - private ObjectId id; private long wordCount; - public ObjectId getId() { - return id; - } - - public void setId(ObjectId id) { - this.id = id; - } - public long getWordCount() { return wordCount; } @@ -42,7 +30,7 @@ public class ArticleStats implements Serializable { @Override public String toString() { - return ArticleStats.class.getSimpleName() + "[id:" + id + ", wordCount:" + wordCount + "]"; + return ArticleStats.class.getSimpleName() + "[wordCount:" + wordCount + "]"; } } \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/model/Topic.java b/vipra-util/src/main/java/de/vipra/util/model/Topic.java index ca9ac7bb..894ba962 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Topic.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Topic.java @@ -14,7 +14,7 @@ import de.vipra.util.an.JsonType; @SuppressWarnings("serial") @JsonType("topic") @Entity(value = "topics", noClassnameStored = true) -@Indexes(@Index("name")) +@Indexes({@Index("name"), @Index("-created")}) public class Topic implements Model<ObjectId>, Serializable { @Id diff --git a/vipra-util/src/main/java/de/vipra/util/model/Word.java b/vipra-util/src/main/java/de/vipra/util/model/Word.java index e32e4e9c..954eb415 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Word.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Word.java @@ -1,20 +1,27 @@ package de.vipra.util.model; import java.io.Serializable; +import java.util.Date; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.Index; +import org.mongodb.morphia.annotations.Indexes; import org.mongodb.morphia.annotations.PostLoad; import org.mongodb.morphia.annotations.PostPersist; +import org.mongodb.morphia.annotations.PrePersist; import org.mongodb.morphia.annotations.Transient; import com.fasterxml.jackson.annotation.JsonIgnore; import de.vipra.util.an.JsonType; +import de.vipra.util.an.JsonWrap; +import de.vipra.util.an.QueryIgnore; @SuppressWarnings("serial") @JsonType("word") @Entity(value = "words", noClassnameStored = true) +@Indexes(@Index("-created")) public class Word implements Model<String>, Serializable { /** @@ -32,6 +39,10 @@ public class Word implements Model<String>, Serializable { @JsonIgnore private String word; + @QueryIgnore(multi = true) + @JsonWrap("attributes") + private Date created; + /** * The created variable is a helper that marks non-persisted new words in * the import process. Each word with created = false will be saved before @@ -39,7 +50,7 @@ public class Word implements Model<String>, Serializable { */ @Transient @JsonIgnore - private boolean created = false; + private boolean isCreated = false; public Word() {} @@ -67,18 +78,32 @@ public class Word implements Model<String>, Serializable { } public boolean isCreated() { + return isCreated; + } + + public void setIsCreated(boolean created) { + this.isCreated = created; + } + + public Date getCreated() { return created; } - public void setCreated(boolean created) { + public void setCreated(Date created) { this.created = created; } @PostLoad @PostPersist - private void post() { + private void postLoadPersist() { this.id = word; - this.created = true; + this.isCreated = true; + } + + @PrePersist + private void prePersist() { + if (this.created == null) + this.created = new Date(); } } diff --git a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java index fd0b6c1a..ab2b058b 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java +++ b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java @@ -71,7 +71,7 @@ public class DatabaseService<T extends Model<?>, U> implements Service<T, U, Dat q.order(sortBy); if (fields != null && fields.length > 0) q.retrievedFields(true, setMinus(fields, ignoredFieldsMulti)); - else if (!defaultIgnore && ignoredFieldsMulti.length > 0) + else if (defaultIgnore && ignoredFieldsMulti.length > 0) q.retrievedFields(false, ignoredFieldsMulti); List<T> list = q.asList(); return list; -- GitLab