diff --git a/vipra-cmd/runcfg/CMD.launch b/vipra-cmd/runcfg/CMD.launch new file mode 100644 index 0000000000000000000000000000000000000000..d65397a0749fe379fbce8f55a6c705f5fcaf1fa5 --- /dev/null +++ b/vipra-cmd/runcfg/CMD.launch @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication"> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS"> +<listEntry value="/vipra-cmd/src/main/java/de/vipra/cmd/Main.java"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES"> +<listEntry value="1"/> +</listAttribute> +<listAttribute key="org.eclipse.debug.ui.favoriteGroups"> +<listEntry value="org.eclipse.debug.ui.launchGroup.run"/> +</listAttribute> +<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-dcC yearly -AI /home/eike/repos/master/ma-impl/vm/data/data.json"/> +<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> +<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> +<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> +</launchConfiguration> diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java index 096f76120961e34548b7d655d053921d875617ea..145b7965aa6efde2072a59a8d7ef0eb906c86a7b 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/Main.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/Main.java @@ -93,6 +93,8 @@ public class Main { ConsoleUtils.error(cause.getMessage()); else ConsoleUtils.error(e.getMessage()); + if (opts.isDebug() && !opts.isSilent()) + e.printStackTrace(System.out); } } } else { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java index c939f635cdd9dfcd0e6bb2e6b64ceb1563776160..42d0f02ad5cd607900751e5547af7f951384aa59 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java @@ -68,7 +68,7 @@ public class FilebaseWordIndex implements Iterable<String> { public void countWords(final List<ArticleWord> articleWords) { for (final ArticleWord articleWord : articleWords) - wordDocumentCount.count(articleWord.getId()); + wordDocumentCount.count(articleWord.getId().toLowerCase()); } public String transform(final String[] words) { @@ -86,7 +86,8 @@ public class FilebaseWordIndex implements Iterable<String> { return sb.toString(); } - public int index(final String word) { + public int index(String word) { + word = word.toLowerCase().trim(); Integer index = wordIndex.get(word); if (index == null) { index = nextIndex++; diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java index 57f2b7c3525d7edcd793c843dd4efc23e0b4de3c..3564c6c2b8810a313bbaf178a429576a783a5ea1 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java @@ -18,7 +18,7 @@ public class ProcessedText { private final List<ArticleWord> articleWords; public ProcessedText(final String text, final long wordCount) { - words = text.split("\\s+"); + words = text.toLowerCase().trim().split("\\s+"); originalWordCount = wordCount; reducedWordCount = words.length; reductionRatio = 1 - ((double) reducedWordCount / wordCount); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java index 30832f4464445ffaf3a2fe642e718a619a35b5cf..d2cd9b822040cd518573212666e51a0b47add551 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java @@ -9,6 +9,7 @@ import java.util.Set; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +import de.vipra.util.CountMap; import de.vipra.util.model.TextEntity; @JsonIgnoreProperties(ignoreUnknown = true) @@ -26,13 +27,23 @@ public class SpotlightResponse { } public List<TextEntity> getEntities() { + final CountMap<String> textEntitiesCount = new CountMap<>(resources.size()); final Set<TextEntity> textEntities = new HashSet<>(resources.size()); + // get entities and count for (SpotlightResource resource : resources) { textEntities.add(new TextEntity(resource.getSurfaceForm(), resource.getUri())); + textEntitiesCount.count(resource.getSurfaceForm()); // TODO add types to entities? } + + // insert count + for (TextEntity textEntity : textEntities) + textEntity.setCount(textEntitiesCount.get(textEntity.getEntity())); + + // to list and sort final List<TextEntity> textEntitiesList = new ArrayList<>(textEntities); Collections.sort(textEntitiesList); + return textEntitiesList; } diff --git a/vipra-ui/app/html/articles/show.html b/vipra-ui/app/html/articles/show.html index 573b548c9d648cccc6a38c5aa1a34126270e7e02..2af0c21facfc9bd6a658825260919b7f1059631b 100644 --- a/vipra-ui/app/html/articles/show.html +++ b/vipra-ui/app/html/articles/show.html @@ -110,6 +110,7 @@ <thead> <tr> <th ng-model="articlesShowModels.entitiesSort" sort-by="entity">Entity</th> + <th ng-model="articlesShowModels.entitiesSort" sort-by="count">Count</th> </tr> </thead> <tbody> @@ -117,6 +118,7 @@ <td> <entity-link entity="::entity" /> </td> + <td ng-bind="::entity.count"></td> </tr> </tbody> </table> @@ -135,12 +137,12 @@ <div class="panel panel-default"> <div class="panel-heading"> Found <ng-pluralize count="allWords.length||0" when="{0:'no words',1:'1 word',other:'{} unique words'}"></ng-pluralize> for this article.<br> - Article has <ng-pluralize count="article.stats.wordCount||0" when="{0:'no words',1:'1 word',other:'{} words'}"></ng-pluralize>, <span ng-bind-template="{{::article.stats.processedWordCount}} after cleaning ({{::Vipra.toPercent(article.stats.reductionRatio)}}% reduction)"></span>. + Article has <ng-pluralize count="article.stats.wordCount||0" when="{0:'no words',1:'1 word',other:'{} words'}"></ng-pluralize>, <span ng-bind-template="{{::article.stats.processedWordCount}} after cleaning ({{::Vipra.toPercent(article.stats.reductionRatio)}}% reduction)" ng-show="article.stats.wordCount>0"></span>. </div> <table class="table table-bordered table-condensed table-fixed"> <thead> <tr> - <th ng-model="articlesShowModels.wordsSort" sort-by="word">Word</th> + <th ng-model="articlesShowModels.wordsSort" sort-by="id">Word</th> <th ng-model="articlesShowModels.wordsSort" sort-by="count">Count</th> <th>Share</th> <th>Reduced share</th> diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js index 1fc17d6612a0a4892a1028c846f9268d1f78f37d..d7ca1504e58034a63ef568cefa85e540eb91caf0 100644 --- a/vipra-ui/app/js/controllers.js +++ b/vipra-ui/app/js/controllers.js @@ -638,7 +638,7 @@ topicsSort: '-share', similarSort: '-share', wordsSort: '-count', - entitiesSort: 'entity' + entitiesSort: '-count' }; ArticleFactory.get({ diff --git a/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java b/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java index e66fa89f2753e395de3e6970cf0709978c912029..d0bb694519766f6b6277ea85f1aa31b90d6cf5a5 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java @@ -15,6 +15,8 @@ public class TextEntity implements Comparable<TextEntity>, Serializable { private String url; + private Integer count; + public TextEntity() {} public TextEntity(final String entity, final String url) { @@ -38,6 +40,14 @@ public class TextEntity implements Comparable<TextEntity>, Serializable { this.url = url; } + public Integer getCount() { + return count; + } + + public void setCount(Integer count) { + this.count = count; + } + @Override public int hashCode() { final int prime = 31; @@ -65,7 +75,7 @@ public class TextEntity implements Comparable<TextEntity>, Serializable { @Override public int compareTo(TextEntity o) { - return entity.compareTo(o.getEntity()); + return count.compareTo(o.getCount()); } }