From 931b4dcd46d26a56054559274c041dff99b63748 Mon Sep 17 00:00:00 2001 From: Eike Cochu <eike@cochu.com> Date: Sun, 3 Apr 2016 22:24:56 +0200 Subject: [PATCH] updated importing text entities --- .../vipra/rest/resource/SearchResource.java | 22 +++++++-- .../de/vipra/cmd/option/ImportCommand.java | 48 +++++++++++-------- .../de/vipra/cmd/text/SpotlightResponse.java | 17 +++++++ vipra-ui/app/html/index.html | 33 +++++++++++-- vipra-ui/app/js/controllers.js | 26 ++++++++-- vipra-ui/app/js/directives.js | 23 +++++++++ vipra-ui/app/less/app.less | 4 ++ vipra-ui/bower.json | 5 +- vipra-ui/gulpfile.js | 8 ++-- .../java/de/vipra/util/model/ArticleFull.java | 16 ++++++- .../java/de/vipra/util/model/TextEntity.java | 32 ++++++++++++- 11 files changed, 193 insertions(+), 41 deletions(-) diff --git a/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java b/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java index 18a39fd0..e2e15af7 100644 --- a/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java +++ b/vipra-backend/src/main/java/de/vipra/rest/resource/SearchResource.java @@ -19,7 +19,9 @@ import javax.ws.rs.core.Response; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.transport.TransportClient; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.RangeQueryBuilder; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; @@ -46,7 +48,8 @@ public class SearchResource { @GET @Produces(MediaType.APPLICATION_JSON) public Response doSearch(@QueryParam("topicModel") final String topicModel, @QueryParam("skip") Integer skip, @QueryParam("limit") Integer limit, - @QueryParam("fields") final String fields, @QueryParam("query") final String query) { + @QueryParam("fields") final String fields, @QueryParam("query") final String query, @QueryParam("long") final Long fromDate, + @QueryParam("to") final Long toDate) { final ResponseWrapper<List<ArticleFull>> res = new ResponseWrapper<>(); if (skip == null || skip < 0) @@ -63,11 +66,20 @@ public class SearchResource { indexName = topicModel + "-articles"; SearchResponse response = null; + + QueryBuilder qb = QueryBuilders.multiMatchQuery(query, "topics^" + Constants.ES_BOOST_TOPICS, "title^" + Constants.ES_BOOST_TITLES, "_all"); + + if (fromDate != null || toDate != null) { + final RangeQueryBuilder rqb = QueryBuilders.rangeQuery("date"); + if (fromDate != null) + rqb.from(fromDate); + if (toDate != null) + rqb.to(toDate); + qb = QueryBuilders.boolQuery().must(qb).must(rqb); + } + try { - response = client.prepareSearch(indexName) - .setQuery( - QueryBuilders.multiMatchQuery(query, "topics^" + Constants.ES_BOOST_TOPICS, "title^" + Constants.ES_BOOST_TITLES, "_all")) - .setFrom(skip).setSize(limit).execute().actionGet(); + response = client.prepareSearch(indexName).setQuery(qb).setFrom(skip).setSize(limit).execute().actionGet(); } catch (final Exception e) { e.printStackTrace(); res.addError(new APIError(Response.Status.BAD_REQUEST, "Error", e.getMessage())); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index c6220dcc..7cda33e4 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -6,7 +6,6 @@ import java.io.FileReader; import java.io.FilenameFilter; import java.io.IOException; import java.util.ArrayList; -import java.util.EnumSet; import java.util.List; import org.bson.types.ObjectId; @@ -146,31 +145,37 @@ public class ImportCommand implements Command { private void importArticle(final JSONObject object) { final ArticleFull article = articleFromJSON(object); - if (EnumSet.of(ProcessorMode.ENTITIES, ProcessorMode.TEXT_WITH_ENTITIES).contains(modelConfig.getProcessorMode())) { - try { - final SpotlightResponse spotlightResponse = spotlightAnalyzer.analyze(article.getText()); - - final List<TextEntity> textEntities = new ArrayList<>(spotlightResponse.getResources().size()); - final StringBuilder sb = new StringBuilder(); + try { - for (final SpotlightResource sr : spotlightResponse.getResources()) { - textEntities.add(new TextEntity(sr.getSurfaceForm(), sr.getUri())); + String text = article.getText(); - for (final String type : sr.getTypes()) { - final String[] parts = type.split(":"); - sb.append(" ").append(parts[parts.length - 1]); + if (spotlightAnalyzer != null) { + // extract entities + final SpotlightResponse spotlightResponse = spotlightAnalyzer.analyze(article.getText()); + final List<TextEntity> textEntities = spotlightResponse.getEntities(); + article.setEntities(textEntities); + + // replace/append text with entities in mixed/entities mode + if (modelConfig.getProcessorMode() == ProcessorMode.ENTITIES || modelConfig.getProcessorMode() == ProcessorMode.TEXT_WITH_ENTITIES) { + final StringBuilder sb = new StringBuilder(); + for (final SpotlightResource sr : spotlightResponse.getResources()) { + sb.append(" ").append(sr.getSurfaceForm()); + + for (final String type : sr.getTypes()) { + final String[] parts = type.split(":"); + sb.append(" ").append(parts[parts.length - 1]); + } } - } - // TODO do sth with this - } catch (final IOException e) { - ConsoleUtils.error("could not analyze text with spotlight: " + e.getMessage()); + if (modelConfig.getProcessorMode() == ProcessorMode.ENTITIES) + text = sb.toString().trim(); + else + text += " " + sb.toString(); + } } - } - try { // preprocess text - final ProcessedText processedText = processor.process(modelConfig, article.getText()); + final ProcessedText processedText = processor.process(modelConfig, text); if (processedText.getReducedWordCount() < modelConfig.getDocumentMinimumLength()) { ConsoleUtils.info(" skipped \"" + object.get("title")); @@ -198,6 +203,8 @@ public class ImportCommand implements Command { ConsoleUtils.error("could not save processed article in the database '" + article.getTitle() + "'"); } catch (final FilebaseException e) { ConsoleUtils.error("could not save processed article in the filebase '" + article.getTitle() + "'"); + } catch (IOException e) { + ConsoleUtils.error("io error"); } } @@ -240,7 +247,8 @@ public class ImportCommand implements Command { private void importForModel(final TopicModelConfig modelConfig) throws java.text.ParseException, IOException, ConfigException, ParseException, InterruptedException, DatabaseException { this.modelConfig = modelConfig; - if (this.modelConfig.getProcessorMode() == ProcessorMode.ENTITIES || this.modelConfig.getProcessorMode() == ProcessorMode.TEXT_WITH_ENTITIES) + + if (config.getSpotlightUrl() != null) spotlightAnalyzer = new SpotlightAnalyzer(modelConfig); buffer = new ArticleBuffer(dbArticles); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java index 0cb0ce0c..30832f44 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/SpotlightResponse.java @@ -1,10 +1,16 @@ package de.vipra.cmd.text; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; +import de.vipra.util.model.TextEntity; + @JsonIgnoreProperties(ignoreUnknown = true) public class SpotlightResponse { @@ -19,4 +25,15 @@ public class SpotlightResponse { this.resources = resources; } + public List<TextEntity> getEntities() { + final Set<TextEntity> textEntities = new HashSet<>(resources.size()); + for (SpotlightResource resource : resources) { + textEntities.add(new TextEntity(resource.getSurfaceForm(), resource.getUri())); + // TODO add types to entities? + } + final List<TextEntity> textEntitiesList = new ArrayList<>(textEntities); + Collections.sort(textEntitiesList); + return textEntitiesList; + } + } diff --git a/vipra-ui/app/html/index.html b/vipra-ui/app/html/index.html index 3eddb62d..30b09004 100644 --- a/vipra-ui/app/html/index.html +++ b/vipra-ui/app/html/index.html @@ -27,11 +27,36 @@ </ul> </div> </div> - <div class="row row-spaced"> + <div class="row row-spaced search-row"> <div class="col-md-12"> - <div class="form-group has-feedback"> - <input type="text" class="form-control input-lg" placeholder="Search..." ng-model="search" ng-model-options="{debounce:500}" id="searchBox"> - <i class="form-control-feedback glyphicon glyphicon-search text-muted"></i> + <div class="input-group"> + <div class="form-group has-feedback"> + <input type="text" class="form-control input-lg" placeholder="Search..." ng-model="search" ng-model-options="{debounce:500}" id="searchBox"> + <i class="form-control-feedback glyphicon glyphicon-search text-muted"></i> + </div> + <span class="input-group-btn"> + <button class="btn btn-default btn-lg" type="button" title="Advanced" ng-click="advancedSearch=!advancedSearch"><i class="fa fa-chevron-down text-muted"></i></button> + </span> + </div> + </div> + </div> + <div class="row row-spaced" ng-show="advancedSearch"> + <div class="col-md-6 form-horizontal"> + <label for="advFromDate" class="col-sm-2 control-label">From</label> + <div class="input-group date col-sm-10" id="advFromDate" bs-datetimepicker ng-model="rootModels.advFromDate"> + <input type="text" class="form-control"> + <span class="input-group-addon"> + <span class="glyphicon glyphicon-calendar"></span> + </span> + </div> + </div> + <div class="col-md-6 form-horizontal"> + <label for="advToDate" class="col-sm-2 control-label">To</label> + <div class="input-group date col-sm-10" id="advToDate" bs-datetimepicker ng-model="rootModels.advToDate"> + <input type="text" class="form-control"> + <span class="input-group-addon"> + <span class="glyphicon glyphicon-calendar"></span> + </span> </div> </div> </div> diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js index 92b9c03c..b24be9ae 100644 --- a/vipra-ui/app/js/controllers.js +++ b/vipra-ui/app/js/controllers.js @@ -17,6 +17,19 @@ search: null }; + var prevTopicModelLoading = false; + if(localStorage.tm) { + prevTopicModelLoading = true + TopicModelFactory.get({ + id: localStorage.tm + }, function(data) { + $scope.rootModels.topicModel = data; + prevTopicModelLoading = false; + }, function() { + prevTopicModelLoading = false; + }) + } + $scope.queryTopicModels = function() { TopicModelFactory.query({ fields: '_all' @@ -26,6 +39,8 @@ }; $scope.chooseTopicModel = function() { + if(prevTopicModelLoading) + return; $scope.queryTopicModels(); $scope.rootModels.topicModelModalOpen = true; $('#topicModelModal').modal(); @@ -38,6 +53,7 @@ $scope.changeTopicModel = function(topicModel) { $scope.rootModels.topicModel = topicModel; $('#topicModelModal').modal('hide'); + localStorage.tm = topicModel.id; }; $scope.menubarSearch = function(query) { @@ -118,8 +134,8 @@ /** * Index controller */ - app.controller('IndexController', ['$scope', '$stateParams', '$location', 'ArticleFactory', 'TopicFactory', 'SearchFactory', - function($scope, $stateParams, $location, ArticleFactory, TopicFactory, SearchFactory) { + app.controller('IndexController', ['$scope', '$stateParams', '$location', '$timeout', 'ArticleFactory', 'TopicFactory', 'SearchFactory', + function($scope, $stateParams, $location, $timeout, ArticleFactory, TopicFactory, SearchFactory) { // page was reloaded, choose topic model if (!$scope.rootModels.topicModel) @@ -147,7 +163,7 @@ }); }); - $scope.$watchGroup(['search', 'rootModels.topicModel'], function() { + $scope.$watchGroup(['search', 'rootModels.topicModel', 'rootModels.advFromDate', 'rootModels.advToDate'], function() { if ($scope.search && $scope.rootModels.topicModel) { $location.search('q', $scope.search); $scope.goSearch(); @@ -163,7 +179,9 @@ SearchFactory.query({ topicModel: $scope.rootModels.topicModel.id, limit: 10, - query: $scope.search + query: $scope.search, + from: $scope.rootModels.advFromDate ? $scope.rootModels.advFromDate.getTime() : null, + to: $scope.rootModels.advToDate ? $scope.rootModels.advToDate.getTime() : null }, function(data) { $scope.searching = false; $scope.searchResults = data; diff --git a/vipra-ui/app/js/directives.js b/vipra-ui/app/js/directives.js index 68ff1678..ef7b1f8c 100644 --- a/vipra-ui/app/js/directives.js +++ b/vipra-ui/app/js/directives.js @@ -172,6 +172,29 @@ } }]); + app.directive('bsDatetimepicker', [function() { + return { + scope: { + ngModel: '=' + }, + link: function($scope, $elem) { + $elem.datetimepicker({ + sideBySide: true, + calendarWeeks: true, + showTodayButton: true, + showClear: true, + toolbarPlacement: 'top', + useCurrent: false + }); + $elem.on('dp.change', function(e) { + $scope.$apply(function() { + $scope.ngModel = e.date.toDate(); + }); + }); + } + } + }]); + app.directive('sequenceDropdown', [function() { return { scope: { diff --git a/vipra-ui/app/less/app.less b/vipra-ui/app/less/app.less index 7201c211..ef6b0f4a 100644 --- a/vipra-ui/app/less/app.less +++ b/vipra-ui/app/less/app.less @@ -428,6 +428,10 @@ word-menu { border-top: 1px solid #e5e5e5; } +.search-row { + margin-top: 35px; +} + @-moz-keyframes spin { 100% { -moz-transform: rotateY(360deg); diff --git a/vipra-ui/bower.json b/vipra-ui/bower.json index 11a5e687..1f12c078 100644 --- a/vipra-ui/bower.json +++ b/vipra-ui/bower.json @@ -31,6 +31,7 @@ "awesome-bootstrap-checkbox": "^0.x", "randomcolor": "randomColor#^0.x", "bootbox.js": "bootbox#^4.x", - "angular-hotkeys": "chieffancypants/angular-hotkeys#^1.x" + "angular-hotkeys": "chieffancypants/angular-hotkeys#^1.x", + "eonasdan-bootstrap-datetimepicker": "^4.17.37" } -} \ No newline at end of file +} diff --git a/vipra-ui/gulpfile.js b/vipra-ui/gulpfile.js index 5ad351f6..e6be12b1 100644 --- a/vipra-ui/gulpfile.js +++ b/vipra-ui/gulpfile.js @@ -20,10 +20,11 @@ var assets = { 'bower_components/bootstrap/dist/js/bootstrap.min.js', 'bower_components/highcharts/highstock.js', 'bower_components/vis/dist/vis.min.js', - 'bower_components/moment/min/moment.min.js', + 'bower_components/moment/min/moment-with-locales.min.js', 'bower_components/nya-bootstrap-select/dist/js/nya-bs-select.min.js', 'bower_components/randomcolor/randomColor.js', - 'bower_components/bootbox.js/bootbox.js' + 'bower_components/bootbox.js/bootbox.js', + 'bower_components/eonasdan-bootstrap-datetimepicker/build/js/bootstrap-datetimepicker.min.js' ], css: [ 'bower_components/bootstrap/dist/css/bootstrap.min.css', @@ -31,7 +32,8 @@ var assets = { 'bower_components/vis/dist/vis.min.css', 'bower_components/nya-bootstrap-select/dist/css/nya-bs-select.min.css', 'bower_components/awesome-bootstrap-checkbox/awesome-bootstrap-checkbox.css', - 'bower_components/angular-hotkeys/build/hotkeys.min.css' + 'bower_components/angular-hotkeys/build/hotkeys.min.css', + 'bower_components/eonasdan-bootstrap-datetimepicker/build/css/bootstrap-datetimepicker.min.css' ], fonts: [ 'bower_components/bootstrap/dist/fonts/*', diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java index 3feb126a..2053f17d 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleFull.java @@ -62,7 +62,7 @@ public class ArticleFull implements Model<ObjectId>, Serializable { @QueryIgnore(multi = true) private List<TopicShare> topics; - private int topicsCount; + private Integer topicsCount; @Embedded @QueryIgnore(multi = true) @@ -72,6 +72,10 @@ public class ArticleFull implements Model<ObjectId>, Serializable { @QueryIgnore(all = true) private List<ArticleWord> words; + @Embedded + @QueryIgnore(all = true) + private List<TextEntity> entities; + @Embedded @QueryIgnore(multi = true) private ArticleStats stats; @@ -178,7 +182,7 @@ public class ArticleFull implements Model<ObjectId>, Serializable { topicsCount = topics == null ? 0 : topics.size(); } - public int getTopicsCount() { + public Integer getTopicsCount() { return topicsCount; } @@ -210,6 +214,14 @@ public class ArticleFull implements Model<ObjectId>, Serializable { this.words = words; } + public List<TextEntity> getEntities() { + return entities; + } + + public void setEntities(List<TextEntity> entities) { + this.entities = entities; + } + public ArticleStats getStats() { return stats; } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java b/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java index 49dfe53d..e66fa89f 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java @@ -9,7 +9,7 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @JsonIgnoreProperties(ignoreUnknown = true) @SuppressWarnings("serial") @Embedded -public class TextEntity implements Serializable { +public class TextEntity implements Comparable<TextEntity>, Serializable { private String entity; @@ -38,4 +38,34 @@ public class TextEntity implements Serializable { this.url = url; } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((entity == null) ? 0 : entity.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + TextEntity other = (TextEntity) obj; + if (entity == null) { + if (other.entity != null) + return false; + } else if (!entity.equals(other.entity)) + return false; + return true; + } + + @Override + public int compareTo(TextEntity o) { + return entity.compareTo(o.getEntity()); + } + } -- GitLab