diff --git a/ma-impl.sublime-workspace b/ma-impl.sublime-workspace index ba2c4d36b8a27cea6a5cfed545bf4fc1f0281df6..b05d059fba40f443eb1f50c917c7045b8f3ca918 100644 --- a/ma-impl.sublime-workspace +++ b/ma-impl.sublime-workspace @@ -279,6 +279,31 @@ }, "buffers": [ + { + "file": "vipra-ui/app/routes/index.js", + "settings": + { + "buffer_size": 666, + "line_ending": "Unix" + } + }, + { + "file": "vipra-ui/app/templates/index.hbs", + "settings": + { + "buffer_size": 1247, + "line_ending": "Unix" + } + }, + { + "contents": "pagination\nindexing + search\ntopic network visualization\n\ndynamic nmf python\n\nuser:\naccenture", + "settings": + { + "buffer_size": 93, + "line_ending": "Unix", + "name": "pagination" + } + } ], "build_system": "", "build_system_choices": @@ -460,10 +485,8 @@ "/home/eike/repos/master/ma-impl", "/home/eike/repos/master/ma-impl/vipra-ui", "/home/eike/repos/master/ma-impl/vipra-ui/app", - "/home/eike/repos/master/ma-impl/vipra-ui/app/models", + "/home/eike/repos/master/ma-impl/vipra-ui/app/helpers", "/home/eike/repos/master/ma-impl/vipra-ui/app/routes", - "/home/eike/repos/master/ma-impl/vipra-ui/app/routes/articles", - "/home/eike/repos/master/ma-impl/vipra-ui/app/styles", "/home/eike/repos/master/ma-impl/vipra-ui/app/templates" ], "file_history": @@ -919,8 +942,98 @@ "groups": [ { + "selected": 2, "sheets": [ + { + "buffer": 0, + "file": "vipra-ui/app/routes/index.js", + "semi_transient": false, + "settings": + { + "buffer_size": 666, + "regions": + { + }, + "selection": + [ + [ + 333, + 333 + ] + ], + "settings": + { + "syntax": "Packages/JavaScriptNext - ES6 Syntax/JavaScriptNext.tmLanguage", + "tab_size": 2, + "translate_tabs_to_spaces": true + }, + "translation.x": 0.0, + "translation.y": 0.0, + "zoom_level": 1.0 + }, + "stack_index": 1, + "type": "text" + }, + { + "buffer": 1, + "file": "vipra-ui/app/templates/index.hbs", + "semi_transient": true, + "settings": + { + "buffer_size": 1247, + "regions": + { + }, + "selection": + [ + [ + 478, + 478 + ] + ], + "settings": + { + "syntax": "Packages/Handlebars/grammars/Handlebars.tmLanguage", + "tab_size": 2, + "translate_tabs_to_spaces": true + }, + "translation.x": 0.0, + "translation.y": 0.0, + "zoom_level": 1.0 + }, + "stack_index": 2, + "type": "text" + }, + { + "buffer": 2, + "semi_transient": false, + "settings": + { + "buffer_size": 93, + "regions": + { + }, + "selection": + [ + [ + 83, + 83 + ] + ], + "settings": + { + "auto_name": "pagination", + "default_dir": "/home/eike/repos/master/ma-impl", + "syntax": "Packages/Text/Plain text.tmLanguage" + }, + "translation.x": 0.0, + "translation.y": 0.0, + "zoom_level": 1.0 + }, + "stack_index": 0, + "type": "text" + } ] } ], diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 7cafd4a30ce714134e108d443f26f44567b90e6e..0d88b358dbcefba2c6b8243f2e77180681e63035 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -121,6 +121,10 @@ public class ImportCommand implements Command { article.setProcessedText(processedText); article.setStats(articleStats); article = dbArticles.createSingle(article); + + // add words + for(String word : processedText.getWords()) + wordMap.add(word); // add article to filebase filebase.add(article); @@ -267,6 +271,13 @@ public class ImportCommand implements Command { importOp.setWords(importedWords); timer.lap("saving topic refs"); + /* + * save words + */ + out.info("saving words"); + wordMap.create(); + timer.lap("saving words"); + /* * save import information */ diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java index 10a4d01dc114da8b37494707526b7ceceddeb382..84d3b6b01b5c1c88b6f0d56bf3cecd7556de22bb 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java @@ -1,15 +1,21 @@ package de.vipra.cmd.text; +import de.vipra.util.StringUtils; + public final class ProcessedText { - private final String text; + private final String[] words; public ProcessedText(String text) { - this.text = text; + this.words = text.split("\\s+"); + } + + public String[] getWords() { + return words; } public String getText() { - return text; + return StringUtils.join(words); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java index 58351b300276d5c4d6aeb287edc8f2041b210ef4..0d11b2af676800a206f8eadead1f979cabcd828f 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/Processor.java @@ -39,7 +39,7 @@ public abstract class Processor { public static String clean(String in) { return in.replaceAll(Constants.REGEX_EMAIL, "").replaceAll(Constants.REGEX_URL, "") .replaceAll(Constants.REGEX_NUMBER, "").replaceAll(Constants.CHARS_DISALLOWED, "") - .replaceAll("\\s+", " ").trim(); + .replaceAll(Constants.REGEX_SINGLECHAR, "").replaceAll("\\s+", " ").trim(); } } diff --git a/vipra-rest/src/main/java/de/vipra/rest/resource/ImportResource.java b/vipra-rest/src/main/java/de/vipra/rest/resource/ImportResource.java index f4ba2086f709bea495db4f68aab0cfd901b504a9..0f144c86ece30e21d3c481a8589725c1540225dc 100644 --- a/vipra-rest/src/main/java/de/vipra/rest/resource/ImportResource.java +++ b/vipra-rest/src/main/java/de/vipra/rest/resource/ImportResource.java @@ -65,7 +65,7 @@ public class ImportResource { return Response.status(Response.Status.BAD_REQUEST).entity(res).build(); try { - List<Import> imports = service.getMultiple(skip, limit, sortBy, false, StringUtils.getFields(fields)); + List<Import> imports = service.getMultiple(skip, limit, sortBy, StringUtils.getFields(fields)); if ((skip != null && skip > 0) || (limit != null && limit > 0)) res.addMeta("total", service.count()); @@ -85,7 +85,7 @@ public class ImportResource { @Path("latest") public Response getLatestImport(@QueryParam("fields") String fields) { Wrapper<Import> res = new Wrapper<>(); - List<Import> latestImport = service.getMultiple(0, 1, "date", StringUtils.getFields(fields)); + List<Import> latestImport = service.getMultiple(0, 1, "date", false, StringUtils.getFields(fields)); if (latestImport == null || latestImport.size() != 1) { return res.noContent(); diff --git a/vipra-ui/app/routes/articles/index.js b/vipra-ui/app/routes/articles/index.js index 246a6e2ba8445e85688fd335e5d6ba6cb76be112..e8b133dd62a27dd21f6f3f565eb832e78356032b 100644 --- a/vipra-ui/app/routes/articles/index.js +++ b/vipra-ui/app/routes/articles/index.js @@ -1,5 +1,6 @@ import Ember from 'ember'; +var pagelimit = 100; var chartData = []; var chartOptions = { chart: { @@ -33,7 +34,7 @@ export default Ember.Route.extend({ return Ember.RSVP.hash({ articles: this.store.query('article', { skip: 0, - limit: 100 + limit: pagelimit }), chartOptions: chartOptions, chartData: chartData diff --git a/vipra-ui/app/routes/index.js b/vipra-ui/app/routes/index.js index e928d1e7e739f892f3b78ef564d5c6721fdec47f..2e701b60f2b940458aab4ec826c146e69550775c 100644 --- a/vipra-ui/app/routes/index.js +++ b/vipra-ui/app/routes/index.js @@ -2,22 +2,10 @@ import Ember from 'ember'; export default Ember.Route.extend({ model() { - return Ember.RSVP.hash({ - imports: this.store.findAll('import', { - skip: 0, - limit: 5 - }), - - latestimport: this.store.find('import', 'latest') - }); - }, - - afterModel(model) { - let articles = model.latestimport.get('articles'), - topics = model.latestimport.get('topics'), - words = model.latestimport.get('words'); - model.latestarticles = articles.slice(Math.max(articles.length - 5, 0)); - model.latesttopics = topics.slice(Math.max(topics.length - 5, 0)); - model.latestwords = words.slice(Math.max(words.length - 5, 0)); + return { + latestarticles: this.store.query('article', {limit: 5, sort: '-created'}), + latesttopics: this.store.query('topic', {limit: 5, sort: '-created'}), + latestwords: this.store.query('word', {limit: 5, sort: '-created'}), + }; } }); \ No newline at end of file diff --git a/vipra-ui/app/routes/words/index.js b/vipra-ui/app/routes/words/index.js index e83ce4c68892db5333e925d5a8850336f47e8c55..58b02ec39f61170df7ea056f15dee6c5b2d0118a 100644 --- a/vipra-ui/app/routes/words/index.js +++ b/vipra-ui/app/routes/words/index.js @@ -3,7 +3,7 @@ import Ember from 'ember'; export default Ember.Route.extend({ model() { return Ember.RSVP.hash({ - words: this.store.findAll('word') + words: this.store.query('word', {sort:'word'}) }); } }); \ No newline at end of file diff --git a/vipra-ui/app/styles/app.scss b/vipra-ui/app/styles/app.scss index e072b7b504841013b33a4fd5ce2c83fde688aa3d..f3263a02b9dea1dbd7be8f7a2275fd4164ca7088 100644 --- a/vipra-ui/app/styles/app.scss +++ b/vipra-ui/app/styles/app.scss @@ -43,6 +43,10 @@ body { text-overflow: ellipsis; } +.wordfilters a { + padding-right: 8px; +} + .navbar-default { .navbar-nav { &> .active { diff --git a/vipra-ui/app/templates/words/index.hbs b/vipra-ui/app/templates/words/index.hbs index c1de00df849d88048544e85ef9aaa502e4ed7f42..548c404fb29d00640e8230d57ddffbbecb20d8be 100644 --- a/vipra-ui/app/templates/words/index.hbs +++ b/vipra-ui/app/templates/words/index.hbs @@ -2,7 +2,12 @@ <br> <p> - {{model.words.length}} {{pluralize model.words.length 'Word'}} in the database: + {{model.words.length}} {{pluralize model.words.length 'Word'}} in the database +</p> + +<br> +<p class="wordfilters"> +<a href="#" {{action "wordfilter" "A"}}>A</a> <a href="#" {{action "wordfilter" "B"}}>B</a> <a href="#" {{action "wordfilter" "C"}}>C</a> <a href="#" {{action "wordfilter" "D"}}>D</a> <a href="#" {{action "wordfilter" "E"}}>E</a> <a href="#" {{action "wordfilter" "F"}}>F</a> <a href="#" {{action "wordfilter" "G"}}>G</a> <a href="#" {{action "wordfilter" "H"}}>H</a> <a href="#" {{action "wordfilter" "I"}}>I</a> <a href="#" {{action "wordfilter" "J"}}>J</a> <a href="#" {{action "wordfilter" "K"}}>K</a> <a href="#" {{action "wordfilter" "L"}}>L</a> <a href="#" {{action "wordfilter" "M"}}>M</a> <a href="#" {{action "wordfilter" "N"}}>N</a> <a href="#" {{action "wordfilter" "O"}}>O</a> <a href="#" {{action "wordfilter" "P"}}>P</a> <a href="#" {{action "wordfilter" "Q"}}>Q</a> <a href="#" {{action "wordfilter" "R"}}>R</a> <a href="#" {{action "wordfilter" "S"}}>S</a> <a href="#" {{action "wordfilter" "T"}}>T</a> <a href="#" {{action "wordfilter" "U"}}>U</a> <a href="#" {{action "wordfilter" "V"}}>V</a> <a href="#" {{action "wordfilter" "W"}}>W</a> <a href="#" {{action "wordfilter" "X"}}>X</a> <a href="#" {{action "wordfilter" "Y"}}>Y</a> <a href="#" {{action "wordfilter" "Z"}}>Z</a> </p> {{#each model.words as |word|}} diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 7f3338f5fef26da317c1806647c82c821a434994..870b3a8545e32a5c776104ec2d72808f45be0c22 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -156,7 +156,12 @@ public class Constants { /** * Regular expressiong to find and remove numbers from text. */ - public static final String REGEX_NUMBER = "\\b[0-9]+\\b"; + public static final String REGEX_NUMBER = "\\b\\w*\\d+\\w*\\b"; + + /** + * Regular expression to find and remove single char words. + */ + public static final String REGEX_SINGLECHAR = "\\b\\w\\b"; /* * OTHER diff --git a/vipra-util/src/main/java/de/vipra/util/WordMap.java b/vipra-util/src/main/java/de/vipra/util/WordMap.java index 792f034e673504e22a6d0457df2a7695562b10f7..8d11d931316e97d6c3f4d0a0181047b7e9acfb6b 100644 --- a/vipra-util/src/main/java/de/vipra/util/WordMap.java +++ b/vipra-util/src/main/java/de/vipra/util/WordMap.java @@ -20,7 +20,7 @@ public class WordMap { private final DatabaseService<Word, String> dbWords; private final Map<String, Word> wordMap; private final List<Word> newWords; - private boolean createNow = true; + private boolean createNow = false; public WordMap(DatabaseService<Word, String> dbWords) { this.dbWords = dbWords; @@ -32,8 +32,8 @@ public class WordMap { } public Word get(Object w) { - String strWord = w.toString(); - Word word = wordMap.get(strWord.toLowerCase()); + String strWord = w.toString().toLowerCase(); + Word word = wordMap.get(strWord); if (word == null) { word = new Word(strWord); createWord(word); @@ -42,6 +42,10 @@ public class WordMap { return word; } + public void add(Object w) { + get(w); + } + private Word createWord(Word word) { if (createNow) { try { diff --git a/vipra-util/src/main/java/de/vipra/util/model/Article.java b/vipra-util/src/main/java/de/vipra/util/model/Article.java index e127b6e3bee9352389dc0832de1d95f2b3aaf240..892dcbadb6d88b9acc5f94d1bd7c035f1caffa9d 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Article.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Article.java @@ -13,7 +13,7 @@ import de.vipra.util.an.JsonType; @SuppressWarnings("serial") @JsonType("article") @Entity(value = "articles", noClassnameStored = true) -@Indexes({ @Index("title"), @Index("date") }) +@Indexes({ @Index("title"), @Index("date"), @Index("-created") }) public class Article implements Model<ObjectId>, Serializable { @Id diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java index 6fe77f9cd36ba9c3e34a5dec70faa02b786611d1..26b42d388b96aa1359172dc3a7eb2683afc5be54 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java @@ -27,7 +27,7 @@ import de.vipra.util.an.QueryIgnore; @SuppressWarnings("serial") @JsonType("article") @Entity(value = "articles", noClassnameStored = true) -@Indexes({ @Index("title"), @Index("date") }) +@Indexes({ @Index("title"), @Index("date"), @Index("-created") }) public class ArticleFull extends FileModel<ObjectId> implements Serializable { @Id diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java index db1b8ad393ffd0665f75357546a6800de608a6b3..4449f5b26eeceabe4d2aec28c13d85b039f8148e 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleStats.java @@ -2,29 +2,17 @@ package de.vipra.util.model; import java.io.Serializable; -import org.bson.types.ObjectId; -import org.mongodb.morphia.annotations.Entity; -import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.Embedded; import de.vipra.util.WordMap; -@Entity +@Embedded public class ArticleStats implements Serializable { private static final long serialVersionUID = -4712841724990200627L; - @Id - private ObjectId id; private long wordCount; - public ObjectId getId() { - return id; - } - - public void setId(ObjectId id) { - this.id = id; - } - public long getWordCount() { return wordCount; } @@ -42,7 +30,7 @@ public class ArticleStats implements Serializable { @Override public String toString() { - return ArticleStats.class.getSimpleName() + "[id:" + id + ", wordCount:" + wordCount + "]"; + return ArticleStats.class.getSimpleName() + "[wordCount:" + wordCount + "]"; } } \ No newline at end of file diff --git a/vipra-util/src/main/java/de/vipra/util/model/Topic.java b/vipra-util/src/main/java/de/vipra/util/model/Topic.java index ca9ac7bb9fa94b48d324d8457a68b5abb51b1b1f..894ba962562005cdb318ba3c7a2c00984cf9b736 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Topic.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Topic.java @@ -14,7 +14,7 @@ import de.vipra.util.an.JsonType; @SuppressWarnings("serial") @JsonType("topic") @Entity(value = "topics", noClassnameStored = true) -@Indexes(@Index("name")) +@Indexes({@Index("name"), @Index("-created")}) public class Topic implements Model<ObjectId>, Serializable { @Id diff --git a/vipra-util/src/main/java/de/vipra/util/model/Word.java b/vipra-util/src/main/java/de/vipra/util/model/Word.java index e32e4e9cdbde4218838618ee9422f93fc507f5b1..954eb4150da821a1fb45125dd624baff84483f8e 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/Word.java +++ b/vipra-util/src/main/java/de/vipra/util/model/Word.java @@ -1,20 +1,27 @@ package de.vipra.util.model; import java.io.Serializable; +import java.util.Date; import org.mongodb.morphia.annotations.Entity; import org.mongodb.morphia.annotations.Id; +import org.mongodb.morphia.annotations.Index; +import org.mongodb.morphia.annotations.Indexes; import org.mongodb.morphia.annotations.PostLoad; import org.mongodb.morphia.annotations.PostPersist; +import org.mongodb.morphia.annotations.PrePersist; import org.mongodb.morphia.annotations.Transient; import com.fasterxml.jackson.annotation.JsonIgnore; import de.vipra.util.an.JsonType; +import de.vipra.util.an.JsonWrap; +import de.vipra.util.an.QueryIgnore; @SuppressWarnings("serial") @JsonType("word") @Entity(value = "words", noClassnameStored = true) +@Indexes(@Index("-created")) public class Word implements Model<String>, Serializable { /** @@ -32,6 +39,10 @@ public class Word implements Model<String>, Serializable { @JsonIgnore private String word; + @QueryIgnore(multi = true) + @JsonWrap("attributes") + private Date created; + /** * The created variable is a helper that marks non-persisted new words in * the import process. Each word with created = false will be saved before @@ -39,7 +50,7 @@ public class Word implements Model<String>, Serializable { */ @Transient @JsonIgnore - private boolean created = false; + private boolean isCreated = false; public Word() {} @@ -67,18 +78,32 @@ public class Word implements Model<String>, Serializable { } public boolean isCreated() { + return isCreated; + } + + public void setIsCreated(boolean created) { + this.isCreated = created; + } + + public Date getCreated() { return created; } - public void setCreated(boolean created) { + public void setCreated(Date created) { this.created = created; } @PostLoad @PostPersist - private void post() { + private void postLoadPersist() { this.id = word; - this.created = true; + this.isCreated = true; + } + + @PrePersist + private void prePersist() { + if (this.created == null) + this.created = new Date(); } } diff --git a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java index fd0b6c1af30b733e92a5d573d2f491ba3c4253e4..ab2b058b18ff1cd2a55f63648793471b0184685f 100644 --- a/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java +++ b/vipra-util/src/main/java/de/vipra/util/service/DatabaseService.java @@ -71,7 +71,7 @@ public class DatabaseService<T extends Model<?>, U> implements Service<T, U, Dat q.order(sortBy); if (fields != null && fields.length > 0) q.retrievedFields(true, setMinus(fields, ignoredFieldsMulti)); - else if (!defaultIgnore && ignoredFieldsMulti.length > 0) + else if (defaultIgnore && ignoredFieldsMulti.length > 0) q.retrievedFields(false, ignoredFieldsMulti); List<T> list = q.asList(); return list;