diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/ArticleResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/ArticleResource.java index da5cd899926ddd209470e85aaaf8069f70d19a4f..4f6707ae701e6d19f05edd83e5c031277bfcf5a7 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/ArticleResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/ArticleResource.java @@ -43,11 +43,20 @@ public class ArticleResource { dbArticles = MongoService.getDatabaseService(config, ArticleFull.class); } + /** + * @param topicModel + * @param skip + * @param limit + * @param sortBy + * @param fields + * @param word + * @return + */ @GET @Produces(MediaType.APPLICATION_JSON) public Response getArticles(@QueryParam("topicModel") final String topicModel, @QueryParam("skip") final Integer skip, @QueryParam("limit") final Integer limit, @QueryParam("sort") @DefaultValue("date") final String sortBy, - @QueryParam("fields") final String fields, @QueryParam("word") final String word) { + @QueryParam("fields") final String fields, @QueryParam("word") final String word, @QueryParam("entity") final String entity) { final ResponseWrapper<List<ArticleFull>> res = new ResponseWrapper<>(); if (res.hasErrors()) @@ -64,10 +73,13 @@ public class ArticleResource { if (word != null && !word.isEmpty()) query.criteria("words.word", word); + if (entity != null && !entity.isEmpty()) + query.criteria("entities.entity", entity); + final List<ArticleFull> articles = dbArticles.getMultiple(query); if ((skip != null && skip > 0) || (limit != null && limit > 0)) - res.addHeader("total", dbArticles.count(null)); + res.addHeader("total", dbArticles.count(query.skip(null).limit(null))); else res.addHeader("total", articles.size()); diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java index b4b7f02a86816283763fdd094a7148e59a20533d..136ff1143ea86a04642da44b16daee0df73570c8 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/TopicResource.java @@ -70,7 +70,7 @@ public class TopicResource { final List<TopicFull> topics = dbTopics.getMultiple(query); if ((skip != null && skip > 0) || (limit != null && limit > 0)) - res.addHeader("total", dbTopics.count(null)); + res.addHeader("total", dbTopics.count(query.skip(null).limit(null))); else res.addHeader("total", topics.size()); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java index 09141da0ab88216799fc84f4a0103eed44dc8242..daa810c06f5bb38ae4f2e34ec0ca1b40644c1900 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java @@ -21,6 +21,7 @@ public class Filebase { public static final String FILE_NAME = "dtm-mult.dat"; + private final TopicModelConfig modelConfig; private final File modelDir; private final File file; private final Map<String, ArticleFull> newArticles; @@ -29,6 +30,7 @@ public class Filebase { private final FilebaseWindowIndex windowIndex; public Filebase(final TopicModelConfig modelConfig, final File dataDir) throws ParseException, IOException { + this.modelConfig = modelConfig; modelDir = new File(dataDir, modelConfig.getName()); file = new File(modelDir, FILE_NAME); newArticles = new HashMap<>(); @@ -40,7 +42,6 @@ public class Filebase { public void add(final ArticleFull article) throws FilebaseException { newArticles.put(article.getId().toString(), article); idDateIndex.add(article.getId().toString(), article.getDate()); - wordIndex.countWords(article.getWords()); } public void sync() throws IOException, ConfigException { @@ -66,7 +67,16 @@ public class Filebase { for (final FilebaseIDDateIndexEntry entry : idDateIndex) { if (entry.isNew()) { final ArticleFull newArticle = newArticles.get(entry.getId()); - outModel.write(wordIndex.transform(newArticle.getProcessedText())); + switch (modelConfig.getProcessorMode()) { + case TEXT_WITH_ENTITIES: + outModel.write(wordIndex.transform(newArticle.getProcessedText(), true)); + outModel.write(" "); + case ENTITIES: + outModel.write(wordIndex.transform(newArticle.entitiesWithTypes(), false)); + break; + case TEXT: + outModel.write(wordIndex.transform(newArticle.getProcessedText(), true)); + } outModel.write(Constants.LINE_SEP); } else { if (in == null) { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java index 42d0f02ad5cd607900751e5547af7f951384aa59..11a199feb78dd87024d3d3ff21765454e7856ba5 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java @@ -1,10 +1,7 @@ package de.vipra.cmd.file; -import java.io.BufferedWriter; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -14,10 +11,8 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import de.vipra.util.Constants; import de.vipra.util.CountMap; import de.vipra.util.FileUtils; -import de.vipra.util.model.ArticleWord; public class FilebaseWordIndex implements Iterable<String> { @@ -27,7 +22,6 @@ public class FilebaseWordIndex implements Iterable<String> { private final File file; private final List<String> words; private final Map<String, Integer> wordIndex; - private final CountMap<String> wordDocumentCount; private final Set<String> newWords; private int nextIndex = 0; @@ -38,44 +32,30 @@ public class FilebaseWordIndex implements Iterable<String> { final List<String> lines = FileUtils.readFile(file); words = new ArrayList<>(lines.size()); wordIndex = new HashMap<>(lines.size()); - wordDocumentCount = new CountMap<>(lines.size()); for (final String line : lines) { final String[] parts = line.split(","); words.add(parts[0]); wordIndex.put(parts[0], nextIndex++); - wordDocumentCount.count(parts[0], Integer.parseInt(parts[1])); } } else { words = new ArrayList<>(); wordIndex = new HashMap<>(); - wordDocumentCount = new CountMap<>(); } } public void sync() throws IOException { if (!dirty) return; - final BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, false))); - for (final String word : words) { - out.write(word); - out.write(","); - out.write(Integer.toString(wordDocumentCount.get(word))); - out.write(Constants.LINE_SEP); - } - out.close(); + org.apache.commons.io.FileUtils.writeLines(file, words); dirty = false; } - public void countWords(final List<ArticleWord> articleWords) { - for (final ArticleWord articleWord : articleWords) - wordDocumentCount.count(articleWord.getId().toLowerCase()); - } - - public String transform(final String[] words) { + public String transform(final String[] words, final boolean dbInsert) { final CountMap<String> countMap = new CountMap<>(); for (final String word : words) { countMap.count(word); - newWords.add(word); + if (dbInsert) + newWords.add(word); } final StringBuilder sb = new StringBuilder(); @@ -98,14 +78,6 @@ public class FilebaseWordIndex implements Iterable<String> { return index; } - public int getWordDocumentCount(final String word) { - return wordDocumentCount.get(word); - } - - public int getWordDocumentCount(final int wordIndex) { - return getWordDocumentCount(words.get(wordIndex)); - } - public String word(final int index) { return words.get(index); } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 7cda33e4e848a9ba6a410428a81259459e743682..9eefddddb29c26e607070df11009d1b933e3c8cd 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -7,6 +7,8 @@ import java.io.FilenameFilter; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.bson.types.ObjectId; import org.json.simple.JSONArray; @@ -21,7 +23,6 @@ import de.vipra.cmd.text.ProcessedText; import de.vipra.cmd.text.Processor; import de.vipra.cmd.text.ProcessorException; import de.vipra.cmd.text.SpotlightAnalyzer; -import de.vipra.cmd.text.SpotlightResource; import de.vipra.cmd.text.SpotlightResponse; import de.vipra.util.Config; import de.vipra.util.ConsoleUtils; @@ -142,43 +143,41 @@ public class ImportCommand implements Command { * @return * @throws Exception */ - private void importArticle(final JSONObject object) { + private void importArticle(final JSONObject object, final int current, final int max) { final ArticleFull article = articleFromJSON(object); - try { + final int maxPad = Integer.toString(max).length(); + String currentStr = "(" + StringUtils.pad(Integer.toString(current), maxPad, " ", true) + "/" + max + ")"; - String text = article.getText(); + if (max > 1) { + if (current < max) + currentStr = " ├ " + currentStr; + else + currentStr = " └ " + currentStr; + } else + currentStr = " └ " + currentStr; + try { + // preprocess text + final ProcessedText processedText = processor.process(modelConfig, article.getText()); + + // spotlight analysis if (spotlightAnalyzer != null) { - // extract entities final SpotlightResponse spotlightResponse = spotlightAnalyzer.analyze(article.getText()); final List<TextEntity> textEntities = spotlightResponse.getEntities(); article.setEntities(textEntities); - // replace/append text with entities in mixed/entities mode - if (modelConfig.getProcessorMode() == ProcessorMode.ENTITIES || modelConfig.getProcessorMode() == ProcessorMode.TEXT_WITH_ENTITIES) { - final StringBuilder sb = new StringBuilder(); - for (final SpotlightResource sr : spotlightResponse.getResources()) { - sb.append(" ").append(sr.getSurfaceForm()); - - for (final String type : sr.getTypes()) { - final String[] parts = type.split(":"); - sb.append(" ").append(parts[parts.length - 1]); - } - } - - if (modelConfig.getProcessorMode() == ProcessorMode.ENTITIES) - text = sb.toString().trim(); - else - text += " " + sb.toString(); - } + // insert entities into text + String articleText = article.getText(); + for (final TextEntity textEntity : textEntities) + articleText = articleText.replaceAll("(?i)\\b" + Pattern.quote(textEntity.getEntity()) + "\\b(?![^<]*>|[^<>]*</)", + Matcher.quoteReplacement(textEntity.aTag())); + article.setText(articleText); } - // preprocess text - final ProcessedText processedText = processor.process(modelConfig, text); - - if (processedText.getReducedWordCount() < modelConfig.getDocumentMinimumLength()) { - ConsoleUtils.info(" skipped \"" + object.get("title")); + if (modelConfig.getProcessorMode() != ProcessorMode.ENTITIES + && processedText.getReducedWordCount() < modelConfig.getDocumentMinimumLength()) { + ConsoleUtils.info(currentStr + " skipped \"" + object.get("title")); } else { article.setProcessedText(processedText.getWords()); article.setWords(processedText.getArticleWords()); @@ -195,7 +194,7 @@ public class ImportCommand implements Command { buffer.add(article); filebase.add(article); - ConsoleUtils.info("imported \"" + object.get("title")); + ConsoleUtils.info(currentStr + " imported \"" + object.get("title") + "\""); } } catch (final ProcessorException e) { ConsoleUtils.error("could not preprocess text of article '" + article.getTitle() + "'"); @@ -203,7 +202,7 @@ public class ImportCommand implements Command { ConsoleUtils.error("could not save processed article in the database '" + article.getTitle() + "'"); } catch (final FilebaseException e) { ConsoleUtils.error("could not save processed article in the filebase '" + article.getTitle() + "'"); - } catch (IOException e) { + } catch (final IOException e) { ConsoleUtils.error("io error"); } } @@ -221,13 +220,16 @@ public class ImportCommand implements Command { final Object data = parser.parse(new FileReader(file)); int imported = 0; + ConsoleUtils.info("file \"" + file.getAbsolutePath() + "\""); + if (data instanceof JSONArray) { final JSONArray objects = (JSONArray) data; - imported += objects.size(); - for (final Object object : objects) - importArticle((JSONObject) object); + final int size = objects.size(); + imported += size; + for (int i = 0; i < objects.size(); i++) + importArticle((JSONObject) objects.get(i), i + 1, size); } else if (data instanceof JSONObject) { - importArticle((JSONObject) data); + importArticle((JSONObject) data, 1, 1); imported++; } else { ConsoleUtils.error("unknown data format"); @@ -251,6 +253,10 @@ public class ImportCommand implements Command { if (config.getSpotlightUrl() != null) spotlightAnalyzer = new SpotlightAnalyzer(modelConfig); + if ((modelConfig.getProcessorMode() == ProcessorMode.ENTITIES || modelConfig.getProcessorMode() == ProcessorMode.TEXT_WITH_ENTITIES) + && spotlightAnalyzer == null) + throw new ConfigException("spotlight url is empty, but processor mode is set to " + modelConfig.getProcessorMode()); + buffer = new ArticleBuffer(dbArticles); filebase = new Filebase(modelConfig, config.getDataDirectory()); topicModel = new TopicModelFull(modelConfig.getName(), modelConfig); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java index d2cd9b822040cd518573212666e51a0b47add551..084ae123cc5ff04604aac46259849a414e0e18b6 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java @@ -29,21 +29,37 @@ public class SpotlightResponse { public List<TextEntity> getEntities() { final CountMap<String> textEntitiesCount = new CountMap<>(resources.size()); final Set<TextEntity> textEntities = new HashSet<>(resources.size()); + // get entities and count - for (SpotlightResource resource : resources) { - textEntities.add(new TextEntity(resource.getSurfaceForm(), resource.getUri())); + for (final SpotlightResource resource : resources) { + final Set<String> types = new HashSet<>(resource.getTypes().size()); + for (final String type : resource.getTypes()) { + if (!type.isEmpty()) { + final String[] parts; + if (type.contains("/")) + parts = type.split("/"); + else + parts = type.split(":"); + if (parts.length > 0) + types.add(parts[parts.length - 1]); + } + } + + final TextEntity textEntity = new TextEntity(resource.getSurfaceForm(), resource.getUri()); + textEntity.setTypes(new ArrayList<>(types)); + + textEntities.add(textEntity); textEntitiesCount.count(resource.getSurfaceForm()); - // TODO add types to entities? } - + // insert count - for (TextEntity textEntity : textEntities) + for (final TextEntity textEntity : textEntities) textEntity.setCount(textEntitiesCount.get(textEntity.getEntity())); - + // to list and sort final List<TextEntity> textEntitiesList = new ArrayList<>(textEntities); Collections.sort(textEntitiesList); - + return textEntitiesList; } diff --git a/vipra-ui/app/html/directives/entity-menu.html b/vipra-ui/app/html/directives/entity-menu.html index fc02195d0b32c838235b74ae70c101bcffe09e02..08f1ca79c41cfaa0dfaf6f4fafa2903c44a21861 100644 --- a/vipra-ui/app/html/directives/entity-menu.html +++ b/vipra-ui/app/html/directives/entity-menu.html @@ -3,9 +3,8 @@ <i class="fa fa-caret-down"></i> </a> <ul class="dropdown-menu" ng-class="{'dropdown-menu-right':dropdownRight}"> - <li><a ui-sref="entities.show.topics({id:entity.entity})">Topics</a></li> <li><a ui-sref="entities.show.articles({id:entity.entity})">Articles</a></li> <li role="separator" class="divider"></li> - <li><a ng-href="{{entity.url}}" target="_blank">DBPedia</a></li> + <li><a ng-href="{{entity.url}}" target="_blank"><span class="dbpedia-logo"></span> DBPedia</a></li> </ul> </div> \ No newline at end of file diff --git a/vipra-ui/app/html/entities/articles.html b/vipra-ui/app/html/entities/articles.html index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..be1fda441e080bb3dc8ed159387a2181f3db1606 100644 --- a/vipra-ui/app/html/entities/articles.html +++ b/vipra-ui/app/html/entities/articles.html @@ -0,0 +1,58 @@ +<div class="container" ng-cloak ng-hide="!rootModels.topicModel || state.name !== 'entities.show.articles'"> + <div class="row"> + <div class="col-md-12"> + <div class="page-header"> + <h1 ng-bind-template="Articles for entity '{{::entity}}'"></h1> + <table class="item-actions"> + <tr> + <td> + <a class="btn btn-default" ng-click="goBack()" ng-show="oldState.name && oldState.name !== state.name">Back</a> + </td> + </tr> + </table> + </div> + </div> + </div> + <div class="row"> + <div class="col-md-12 text-center"> + <pagination total="articlesTotal" page="entitiesArticlesModels.page" limit="entitiesArticlesModels.limit" /> + </div> + </div> + <div class="row"> + <div class="col-md-12"> + <div class="panel panel-default"> + <div class="panel-heading"> + Found + <ng-pluralize count="articlesTotal||0" when="{0:'no articles',1:'1 article',other:'{} articles'}"></ng-pluralize> in the database. + <span ng-show="articlesTotal"> + Sort by + <ol class="nya-bs-select nya-bs-condensed" ng-model="entitiesArticlesModels.sortkey"> + <li value="title" class="nya-bs-option"><a>Title</a></li> + <li value="date" class="nya-bs-option"><a>Date</a></li> + <li value="created" class="nya-bs-option"><a>Added</a></li> + </ol> + <sort-dir ng-model="entitiesArticlesModels.sortdir" /> + </span> + </div> + <table class="table table-hover table-condensed"> + <tbody> + <tr ng-repeat="article in articles"> + <td> + <a ui-sref="articles.show({id: article.id})" ng-bind="::article.title"></a> + </td> + </tr> + </tbody> + </table> + <div class="panel-footer"> + Page <span ng-bind="entitiesArticlesModels.page||1"></span> of <span ng-bind="maxPage||1"></span> + </div> + </div> + </div> + </div> + <div class="row"> + <div class="col-md-12 text-center"> + <pagination total="articlesTotal" page="entitiesArticlesModels.page" limit="entitiesArticlesModels.limit" /> + </div> + </div> +</div> +<div ng-cloak ui-view></div> \ No newline at end of file diff --git a/vipra-ui/app/html/entities/index.html b/vipra-ui/app/html/entities/index.html index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0b019785b96a496cf641cd08a7f73efd6867ac2c 100644 --- a/vipra-ui/app/html/entities/index.html +++ b/vipra-ui/app/html/entities/index.html @@ -0,0 +1 @@ +<div ng-cloak ui-view></div> \ No newline at end of file diff --git a/vipra-ui/app/html/entities/show.html b/vipra-ui/app/html/entities/show.html index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0b019785b96a496cf641cd08a7f73efd6867ac2c 100644 --- a/vipra-ui/app/html/entities/show.html +++ b/vipra-ui/app/html/entities/show.html @@ -0,0 +1 @@ +<div ng-cloak ui-view></div> \ No newline at end of file diff --git a/vipra-ui/app/html/entities/topics.html b/vipra-ui/app/html/entities/topics.html deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/vipra-ui/app/html/words/show.html b/vipra-ui/app/html/words/show.html index 515f6e9c2fecc07d0d800cdd89681a47a92ce6e0..0b019785b96a496cf641cd08a7f73efd6867ac2c 100644 --- a/vipra-ui/app/html/words/show.html +++ b/vipra-ui/app/html/words/show.html @@ -1,4 +1 @@ -<div class="container" ng-cloak ng-hide="!rootModels.topicModel || state.name !== 'words.show'"> - <h1 ng-bind="word.id"></h1> -</div> -<div ng-cloak ui-view></div> +<div ng-cloak ui-view></div> \ No newline at end of file diff --git a/vipra-ui/app/img/dbpedia-logo.svg b/vipra-ui/app/img/dbpedia-logo.svg new file mode 100644 index 0000000000000000000000000000000000000000..8d171f09becf65b836a7c46855dbb4483a1ad7b4 --- /dev/null +++ b/vipra-ui/app/img/dbpedia-logo.svg @@ -0,0 +1 @@ +<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="svg3718" viewBox="0 0 130.413 96.786428"><defs id="defs3720"><radialGradient gradientUnits="userSpaceOnUse" r="55.666401" cy="59.631302" cx="131.353" id="XMLID_18_"><stop id="stop130" offset="0" stop-color="#FFD528"/><stop id="stop132" offset="0.222" stop-color="#FFD227"/><stop id="stop134" offset="0.4435" stop-color="#FFC925"/><stop id="stop136" offset="0.6647" stop-color="#FDBB21"/><stop id="stop138" offset="0.8846" stop-color="#FAA61D"/><stop id="stop140" offset="1" stop-color="#F8991D"/></radialGradient><radialGradient gradientUnits="userSpaceOnUse" r="63.037201" cy="61.586399" cx="130.28909" id="XMLID_17_"><stop id="stop101" offset="0" stop-color="#6D8896"/><stop id="stop103" offset="0.0435" stop-color="#4B7487"/><stop id="stop105" offset="0.0933" stop-color="#286379"/><stop id="stop107" offset="0.1494" stop-color="#00556C"/><stop id="stop109" offset="0.2114" stop-color="#004962"/><stop id="stop111" offset="0.2815" stop-color="#004059"/><stop id="stop113" offset="0.3633" stop-color="#003951"/><stop id="stop115" offset="0.4644" stop-color="#00344C"/><stop id="stop117" offset="0.6062" stop-color="#003149"/><stop id="stop119" offset="1" stop-color="#003048"/></radialGradient><radialGradient xlink:href="#XMLID_18_" id="radialGradient3700" gradientUnits="userSpaceOnUse" cx="131.353" cy="59.631302" r="55.666401"/><radialGradient xlink:href="#XMLID_18_" id="radialGradient3702" gradientUnits="userSpaceOnUse" cx="131.353" cy="59.631302" r="55.666401"/><radialGradient xlink:href="#XMLID_18_" id="radialGradient3704" gradientUnits="userSpaceOnUse" cx="131.353" cy="59.631302" r="55.666401"/><radialGradient xlink:href="#XMLID_18_" id="radialGradient3706" gradientUnits="userSpaceOnUse" cx="131.353" cy="59.631302" r="55.666401"/><radialGradient xlink:href="#XMLID_18_" id="radialGradient3708" gradientUnits="userSpaceOnUse" cx="131.353" cy="59.631302" r="55.666401"/><radialGradient xlink:href="#XMLID_18_" id="radialGradient3710" gradientUnits="userSpaceOnUse" cx="131.353" cy="59.631302" r="55.666401"/><radialGradient xlink:href="#XMLID_18_" id="radialGradient3712" gradientUnits="userSpaceOnUse" cx="131.353" cy="59.631302" r="55.666401"/><radialGradient xlink:href="#XMLID_18_" id="radialGradient3714" gradientUnits="userSpaceOnUse" cx="131.353" cy="59.631302" r="55.666401"/><radialGradient xlink:href="#XMLID_18_" id="radialGradient3716" gradientUnits="userSpaceOnUse" cx="131.353" cy="59.631302" r="55.666401"/></defs><metadata id="metadata3723"/><g transform="translate(-2.661465,-3.636792)" id="layer1"><g id="g94" transform="translate(-63.989535,3.637222)"><path id="path96" d="m184.007 65.5c-1.928 0.029-4.055-0.01-6.305-0.049C163.736 65.202 143.662 64.867 131.859 80.32 120.055 64.867 99.982 65.203 86.014 65.451c-2.25 0.039-4.376 0.078-6.303 0.049-0.462-0.005-4.855 0.136-13.06 0.418 0 0 0.556 10.304 7.805 10.304 2.603 0 4.485-0.061 5.18-0.066 2.023 0.027 4.23-0.012 6.567-0.052 19.06-0.34 34.179 0.724 40.959 17.364l1.352 3.318 3.104 0 0.476 0 3.106 0 1.353-3.318c6.779-16.64 21.897-17.705 40.958-17.364 2.338 0.041 4.545 0.08 6.568 0.052 0.693 0.005 2.577 0.066 5.179 0.066 7.251 0 7.806-10.304 7.806-10.304-8.203-0.282-12.597-0.422-13.057-0.418z" fill="#004563"/></g><g id="g98" transform="translate(-63.989535,3.637222)"><radialGradient gradientUnits="userSpaceOnUse" r="63.037201" cy="61.586399" cx="130.28909" id="radialGradient3651"><stop id="stop3653" offset="0" stop-color="#6D8896"/><stop id="stop3655" offset="0.0435" stop-color="#4B7487"/><stop id="stop3657" offset="0.0933" stop-color="#286379"/><stop id="stop3659" offset="0.1494" stop-color="#00556C"/><stop id="stop3661" offset="0.2114" stop-color="#004962"/><stop id="stop3663" offset="0.2815" stop-color="#004059"/><stop id="stop3665" offset="0.3633" stop-color="#003951"/><stop id="stop3667" offset="0.4644" stop-color="#00344C"/><stop id="stop3669" offset="0.6062" stop-color="#003149"/><stop id="stop3671" offset="1" stop-color="#003048"/></radialGradient><path id="path121" d="m137.327 1.135 0 0c-2.923 1.367-5.142 3.79-6.243 6.822-0.487 1.347-0.73 2.741-0.73 4.132 0 1.746 0.382 3.487 1.146 5.117 1.781 3.813 5.409 6.26 9.504 6.761 0.106 1.142 0.227 2.284 0.227 3.441 0 0.553-0.041 1.108-0.064 1.663-0.494 0.127-1.016 0.134-1.482 0.352-2.007 0.936-3.525 2.597-4.28 4.677-0.335 0.922-0.501 1.878-0.501 2.831 0 1.198 0.264 2.391 0.785 3.508 0.48 1.023 1.215 1.864 2.054 2.605-1.188 2.629-2.685 5.213-4.495 7.717-0.17-0.039-0.337-0.076-0.51-0.104-0.546-3.015-1.326-5.929-2.355-8.693 0.611-1.083 1.019-2.25 1.019-3.462 0-1.022-0.22-2.049-0.671-3.015-1.11-2.374-3.376-3.848-5.931-4.045-2.954-4.181-6.559-7.778-10.724-10.754 0.114-0.599 0.311-1.194 0.311-1.79 0-3.917-2.425-7.612-6.304-9.027-4.965-1.802-10.471 0.773-12.274 5.736-0.873 2.406-0.758 5.007 0.325 7.325 1.083 2.318 3.005 4.074 5.411 4.947 2.871 1.041 5.989 0.458 8.492-1.206 3.171 2.25 5.937 4.973 8.266 8.116-0.728 1.152-1.239 2.411-1.239 3.753 0 1.019 0.221 2.047 0.672 3.017 1.162 2.48 3.579 3.991 6.215 4.089 0.704 2.128 1.263 4.369 1.655 6.709-0.221 0.171-0.45 0.333-0.653 0.524-3.313-2.269-6.775-4.13-10.358-5.501-0.126-0.428-0.254-0.866-0.254-0.866-0.814-1.739-2.254-3.056-4.058-3.709-1.8-0.651-3.746-0.562-5.479 0.25-0.769 0.357-1.421 0.894-2.017 1.508-3.976-0.389-7.964-0.2-11.908 0.452-2.589-3.858-7.619-5.437-11.945-3.411-4.791 2.242-6.865 7.961-4.625 12.749 2.243 4.789 7.962 6.861 12.749 4.625 2.829-1.323 4.69-3.887 5.262-6.873 2.897-0.482 5.836-0.668 8.795-0.445 0.093 0.301 0.097 0.634 0.23 0.92 1.675 3.583 5.949 5.137 9.529 3.465 0.778-0.366 1.44-0.89 2.031-1.491 3.123 1.308 6.182 3.037 9.123 5.205 0.017 1.311 0.299 2.604 0.859 3.788 1.04 2.234 2.896 3.934 5.222 4.778 2.328 0.846 4.844 0.734 7.086-0.314 2.479-1.159 4.206-3.33 4.926-5.916 5.223-0.753 10.094-2.186 14.519-4.296 2.022 1.218 4.49 1.486 6.673 0.465 2.572-1.205 4.115-3.771 4.115-6.487 0-0.326-0.113-0.656-0.159-0.986 2.217-1.971 4.24-4.171 6.056-6.563 2.378 0.585 4.858 0.482 7.111-0.574 2.606-1.22 4.579-3.381 5.557-6.081 2.025-5.579-0.866-11.765-6.449-13.798-5.58-2.019-11.771 0.872-13.802 6.45-0.432 1.193-0.642 2.428-0.642 3.655 0 2.296 0.765 4.537 2.149 6.408-1.337 1.731-2.826 3.35-4.466 4.834-1.793-0.684-3.767-0.68-5.541 0.152-2.42 1.128-3.926 3.518-4.061 6.118-3.604 1.65-7.58 2.787-11.853 3.415 2.273-3.198 4.152-6.516 5.563-9.915 0.61-0.128 1.243-0.186 1.809-0.45 2.007-0.937 3.527-2.601 4.282-4.683 0.335-0.922 0.501-1.876 0.501-2.829 0-1.198-0.263-2.389-0.785-3.506-0.542-1.159-1.403-2.093-2.397-2.885 0.07-1.032 0.141-2.066 0.141-3.104 0-1.579-0.154-3.155-0.316-4.729 2.639-1.406 4.666-3.668 5.691-6.499 0.489-1.345 0.731-2.738 0.731-4.126 0-1.747-0.383-3.486-1.145-5.113-2.824-6.036-10.035-8.654-16.076-5.833z" fill="url(#XMLID_17_)"/></g><g id="g123" transform="translate(-63.989535,3.637222)"><g id="g125"><g id="g127"><radialGradient gradientUnits="userSpaceOnUse" r="55.666401" cy="59.631302" cx="131.353" id="radialGradient3677"><stop id="stop3679" offset="0" stop-color="#FFD528"/><stop id="stop3681" offset="0.222" stop-color="#FFD227"/><stop id="stop3683" offset="0.4435" stop-color="#FFC925"/><stop id="stop3685" offset="0.6647" stop-color="#FDBB21"/><stop id="stop3687" offset="0.8846" stop-color="#FAA61D"/><stop id="stop3689" offset="1" stop-color="#F8991D"/></radialGradient><path id="path142" d="m128.544 38.833c0.172-2.384-1.622-4.455-4.006-4.626-2.385-0.171-4.454 1.623-4.625 4.007-0.172 2.384 1.623 4.455 4.011 4.624 2.379 0.173 4.45-1.621 4.62-4.005z" fill="url(#radialGradient3700)"/><circle id="circle144" r="6.6900001" cy="59.631001" cx="131.353" fill="url(#radialGradient3702)"/><circle id="circle146" r="6.493" cy="50.263" cx="82.981003" fill="url(#radialGradient3704)"/><path id="path148" d="m111.45 19.487c0.257-3.577-2.437-6.684-6.014-6.941-3.575-0.253-6.682 2.435-6.937 6.012-0.257 3.577 2.433 6.685 6.014 6.941 3.574 0.256 6.68-2.436 6.937-6.012z" fill="url(#radialGradient3706)"/><path id="path150" d="m108.147 45.254c-2.372-0.171-4.433 1.612-4.605 3.986-0.168 2.372 1.614 4.433 3.989 4.604 2.372 0.169 4.434-1.616 4.605-3.988 0.167-2.372-1.616-4.434-3.989-4.602z" fill="url(#radialGradient3708)"/><path id="path152" d="m174.369 26.85c-3.894-0.279-7.277 2.651-7.555 6.546-0.279 3.895 2.651 7.277 6.544 7.557 3.897 0.281 7.277-2.651 7.559-6.546 0.277-3.893-2.653-7.278-6.548-7.557z" fill="url(#radialGradient3710)"/><circle id="circle154" r="4.3270001" cy="51.766998" cx="158.276" fill="url(#radialGradient3712)"/><circle id="circle156" r="5.2259998" cy="36.931999" cx="143.19099" fill="url(#radialGradient3714)"/><path id="path158" d="m143.05 3.719c-4.624-0.333-8.638 3.146-8.971 7.768-0.328 4.625 3.147 8.64 7.77 8.971 4.626 0.33 8.638-3.149 8.97-7.771 0.328-4.623-3.146-8.638-7.769-8.968z" fill="url(#radialGradient3716)"/></g></g></g></g></svg> \ No newline at end of file diff --git a/vipra-ui/app/js/app.js b/vipra-ui/app/js/app.js index 80e1a6745c5e64b6911d0d2434878b94f58b60fa..59d89ad57e97b4fdbd1ee125399a17d452a46386 100644 --- a/vipra-ui/app/js/app.js +++ b/vipra-ui/app/js/app.js @@ -119,26 +119,20 @@ $stateProvider.state('entities', { url: '/entities?p', templateUrl: 'html/entities/index.html', - controller: 'WordsIndexController' + controller: 'EntitiesIndexController' }); $stateProvider.state('entities.show', { url: '/:id', templateUrl: 'html/entities/show.html', - controller: 'WordsShowController' - }); - - $stateProvider.state('entities.show.topics', { - url: '/topics', - templateUrl: 'html/entities/topics.html', - controller: 'WordsTopicsController' + controller: 'EntitiesShowController' }); $stateProvider.state('entities.show.articles', { url: '/articles', templateUrl: 'html/entities/articles.html', - controller: 'WordsArticlesController' - }); + controller: 'EntitiesArticlesController' + }) // states: errors diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js index d7ca1504e58034a63ef568cefa85e540eb91caf0..4b7ee2bb3371818ab9219dc20070c5d66965e8f5 100644 --- a/vipra-ui/app/js/controllers.js +++ b/vipra-ui/app/js/controllers.js @@ -645,7 +645,7 @@ id: $stateParams.id }, function(data) { $scope.article = data; - $scope.article.text = Vipra.createInitial($scope.article.text); + $scope.article.text = $scope.article.text; $scope.articleDate = Vipra.formatDate($scope.article.date); $scope.articleCreated = Vipra.formatDateTime($scope.article.created); $scope.articleModified = Vipra.formatDateTime($scope.article.modified); @@ -989,6 +989,57 @@ } ]); + /**************************************************************************** + * Entity Controllers + ****************************************************************************/ + + app.controller('EntitiesIndexController', ['$scope', + function($scope) { + + } + ]); + + app.controller('EntitiesShowController', ['$scope', + function($scope) { + + } + ]); + + app.controller('EntitiesArticlesController', ['$scope', '$state', '$stateParams', 'ArticleFactory', + function($scope, $state, $stateParams, ArticleFactory) { + + $scope.entity = $stateParams.id; + + // page was reloaded, choose topic model + if (!$scope.rootModels.topicModel && $state.current.name === 'entities.articles') + $scope.chooseTopicModel(); + + $scope.entitiesArticlesModels = { + sortkey: 'date', + sortdir: true, + page: 1, + limit: 100 + }; + + $scope.$watchGroup(['entitiesArticlesModels.page', 'entitiesArticlesModels.sortkey', 'entitiesArticlesModels.sortdir', 'rootModels.topicModel'], function() { + if (!$scope.rootModels.topicModel) return; + + ArticleFactory.query({ + skip: ($scope.entitiesArticlesModels.page - 1) * $scope.entitiesArticlesModels.limit, + limit: $scope.entitiesArticlesModels.limit, + sort: ($scope.entitiesArticlesModels.sortdir ? '' : '-') + $scope.entitiesArticlesModels.sortkey, + topicModel: $scope.rootModels.topicModel.id, + entity: $scope.entity + }, function(data, headers) { + $scope.articles = data; + $scope.articlesTotal = headers("V-Total"); + $scope.maxPage = Math.ceil($scope.articlesTotal / $scope.entitiesArticlesModels.limit); + }); + }); + + } + ]); + /**************************************************************************** * Word Controllers ****************************************************************************/ @@ -1055,7 +1106,7 @@ skip: ($scope.wordsTopicsModels.page - 1) * $scope.wordsTopicsModels.limit, limit: $scope.wordsTopicsModels.limit, sort: ($scope.wordsTopicsModels.sortdir ? '' : '-') + $scope.wordsTopicsModels.sortkey, - word: $stateParams.word + word: $scope.word }, function(data, headers) { $scope.topics = data; $scope.topicsTotal = headers("V-Total"); diff --git a/vipra-ui/app/less/app.less b/vipra-ui/app/less/app.less index 3f482be594bd978ee1b803219a57f0a0c22c8655..ef6918e769fde26096e1168f1b14a9bc040aa5ce 100644 --- a/vipra-ui/app/less/app.less +++ b/vipra-ui/app/less/app.less @@ -433,6 +433,14 @@ entity-menu { margin-top: 35px; } +.dbpedia-logo { + background-image: url(../img/dbpedia-logo.svg); + display: inline-block; + height: 15px; + width: 21px; + background-repeat: no-repeat; +} + @-moz-keyframes spin { 100% { -moz-transform: rotateY(360deg); diff --git a/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java b/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java index e977f3f74c7076ee1639b304de8280a0a30e6498..030390f6a712e3f49681a861302afcbf030e14f9 100644 --- a/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/ArrayUtils.java @@ -1,9 +1,17 @@ package de.vipra.util; +import java.lang.reflect.Array; import java.util.Arrays; public class ArrayUtils { + /** + * Finds the maximum column values in a matrix of double values + * + * @param values + * the double matrix to scan + * @return an array of maximum values per column + */ public static double[] findColMaximum(final double[][] values) { final int rows = values.length; final int cols = values[0].length; @@ -55,4 +63,25 @@ public class ArrayUtils { return result / Math.log(2); } + /** + * Concatenate two arrays + * + * @param a + * left array + * @param b + * right array + * @return left array + right array concatenated + */ + public static <T> T[] addAll(final T[] a, final T[] b) { + final int aLen = a.length; + final int bLen = b.length; + + @SuppressWarnings("unchecked") + final T[] c = (T[]) Array.newInstance(a.getClass().getComponentType(), aLen + bLen); + System.arraycopy(a, 0, c, 0, aLen); + System.arraycopy(b, 0, c, aLen, bLen); + + return c; + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/CalendarUtils.java b/vipra-util/src/main/java/de/vipra/util/CalendarUtils.java index 3539e91a714bdfe6fae6940e75c5dff1b3ea5e38..6cdd45929e1c780bc639c4154913f4d942705398 100644 --- a/vipra-util/src/main/java/de/vipra/util/CalendarUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/CalendarUtils.java @@ -19,6 +19,14 @@ public class CalendarUtils { return (int) Math.ceil(c.get(Calendar.MONTH) / 3.0); } + /** + * Returns the start month of the quarter of a calendar date. Ranges from + * 0-3, because Java is weird. + * + * @param c + * the calendar of which to return the start month of its quarter + * @return the start month of the quarter of the calendar + */ public static final int getQuarterStart(final Calendar c) { switch (c.get(Calendar.MONTH)) { case 0: @@ -41,6 +49,14 @@ public class CalendarUtils { return 0; } + /** + * Returns the end month of the quarter of a calendar date. Ranges from 0-3, + * because Java is weird. + * + * @param c + * the calendar of which to return the end month of its quarter + * @return the end month of the quarter of the calendar + */ public static final int getQuarterEnd(final Calendar c) { switch (c.get(Calendar.MONTH)) { case 0: diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 046f9cb9e45e9f0d3d2356dc891db5f543615f7f..a4084fd70eac1b1a030e7543bac2906c7d68e198 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -135,9 +135,9 @@ public class Constants { /** * Minimum number of dbpedia inlinks for an entity annotation to be - * accepted. Default 0. + * accepted. Default 20. */ - public static final int SPOTLIGHT_SUPPORT = 0; + public static final int SPOTLIGHT_SUPPORT = 20; /** * Disambiguation confidence. Eliminates top n percent of inconfident diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java index 2053f17db14191c98989a28062176152141fa7fd..be3c96a6f3346141993f4e659740368a0ac015e2 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java @@ -218,7 +218,7 @@ public class ArticleFull implements Model<ObjectId>, Serializable { return entities; } - public void setEntities(List<TextEntity> entities) { + public void setEntities(final List<TextEntity> entities) { this.entities = entities; } @@ -260,6 +260,19 @@ public class ArticleFull implements Model<ObjectId>, Serializable { meta.put(key, value); } + public String[] entitiesWithTypes() { + int size = 0; + for (final TextEntity textEntity : entities) { + size++; + if (textEntity.getTypes() != null) + size += textEntity.getTypes().size(); + } + final List<String> entitiesWithTypes = new ArrayList<>(size); + for (final TextEntity textEntity : entities) + entitiesWithTypes.addAll(textEntity.entityWithTypes()); + return entitiesWithTypes.toArray(new String[size]); + } + @PrePersist public void prePersist() { modified = new Date(); diff --git a/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java b/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java index d0bb694519766f6b6277ea85f1aa31b90d6cf5a5..275e349b86dba56a9009cbf6137b5d6f5c50ca0e 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java @@ -1,6 +1,8 @@ package de.vipra.util.model; import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; import org.mongodb.morphia.annotations.Embedded; @@ -17,6 +19,8 @@ public class TextEntity implements Comparable<TextEntity>, Serializable { private Integer count; + private List<String> types; + public TextEntity() {} public TextEntity(final String entity, final String url) { @@ -44,10 +48,30 @@ public class TextEntity implements Comparable<TextEntity>, Serializable { return count; } - public void setCount(Integer count) { + public void setCount(final Integer count) { this.count = count; } + public List<String> getTypes() { + return types; + } + + public void setTypes(final List<String> types) { + this.types = types; + } + + public String aTag() { + return "<a href=\"" + url + "\">" + entity + "</a>"; + } + + public List<String> entityWithTypes() { + final List<String> entityWithTypes = new ArrayList<>(types.size() + 1); + entityWithTypes.add(entity.toLowerCase()); + for (final String type : types) + entityWithTypes.add(type.toLowerCase()); + return entityWithTypes; + } + @Override public int hashCode() { final int prime = 31; @@ -57,14 +81,14 @@ public class TextEntity implements Comparable<TextEntity>, Serializable { } @Override - public boolean equals(Object obj) { + public boolean equals(final Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; - TextEntity other = (TextEntity) obj; + final TextEntity other = (TextEntity) obj; if (entity == null) { if (other.entity != null) return false; @@ -74,8 +98,13 @@ public class TextEntity implements Comparable<TextEntity>, Serializable { } @Override - public int compareTo(TextEntity o) { + public int compareTo(final TextEntity o) { return count.compareTo(o.getCount()); } + @Override + public String toString() { + return "TextEntity [entity=" + entity + ", url=" + url + ", count=" + count + ", types=" + types + "]"; + } + }