diff --git a/HypernymReader/pom.xml b/HypernymReader/pom.xml index 0e6866a3dfdc249fbfb4135782f8d6ccd9032f66..62b6fdcb9123a7c0ca449205692f892c994c64ed 100644 --- a/HypernymReader/pom.xml +++ b/HypernymReader/pom.xml @@ -14,5 +14,17 @@ <artifactId>commons-lang3</artifactId> <version>3.4</version> </dependency> + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + <version>2.4</version> + </dependency> + + <!-- De-/serialization --> + <dependency> + <groupId>de.ruedigermoeller</groupId> + <artifactId>fst</artifactId> + <version>1.63</version> + </dependency> </dependencies> </project> \ No newline at end of file diff --git a/HypernymReader/src/de/vipra/hypernym/HypernymReader.java b/HypernymReader/src/de/vipra/hypernym/HypernymReader.java index fc7f68d32b9741e7eddb03d17c8bcdf2340b5f18..4e48007d47e1a4028cd5c09f719426294888a178 100644 --- a/HypernymReader/src/de/vipra/hypernym/HypernymReader.java +++ b/HypernymReader/src/de/vipra/hypernym/HypernymReader.java @@ -1,27 +1,26 @@ package de.vipra.hypernym; import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.io.ObjectOutputStream; +import java.io.Serializable; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.zip.GZIPOutputStream; - +import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringEscapeUtils; +import de.ruedigermoeller.serialization.FSTConfiguration; + public class HypernymReader { public static final String IN_FILE_PATH = "/home/eike/Downloads/en.lhd.extension.2015-10.nt"; - public static final String OUT_FILE_PATH = "/home/eike/Downloads/hypernyms.ser.gz"; + public static final String OUT_FILE_PATH = "/home/eike/Downloads/hypernyms.ser"; public static final Pattern LINE_PATTERN = Pattern .compile("<http://dbpedia.org/resource/([^>]+)>\\s+<([^>]+)>\\s+<http://dbpedia.org/resource/([^>]+)>"); @@ -56,11 +55,9 @@ public class HypernymReader { in.close(); - // write map to file - ObjectOutputStream out = new ObjectOutputStream( - new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(OUT_FILE_PATH, false)))); - out.writeObject(hypernyms); - out.close(); + FSTConfiguration conf = FSTConfiguration.createDefaultConfiguration(); + byte barray[] = conf.asByteArray((Serializable) hypernyms); + FileUtils.writeByteArrayToFile(new File(OUT_FILE_PATH), barray); } public static int countLines(final File file) throws IOException { diff --git a/vipra-cmd/pom.xml b/vipra-cmd/pom.xml index 02a54aece9e21ddcc223a78999682fa09ce06bca..88564b13eff169388dc7369363b731278ed3a90c 100644 --- a/vipra-cmd/pom.xml +++ b/vipra-cmd/pom.xml @@ -76,6 +76,13 @@ <version>1.9</version> </dependency> + <!-- De-/serialization --> + <dependency> + <groupId>de.ruedigermoeller</groupId> + <artifactId>fst</artifactId> + <version>1.63</version> + </dependency> + <!-- Workspace --> <dependency> <groupId>de.vipra</groupId> diff --git a/vipra-cmd/runcfg/CMD.launch b/vipra-cmd/runcfg/CMD.launch index 438bf4812b81a5d5621f47a5c907bae847a18adc..a1304e44658a071bd2fb66e9e6be8da85b28959e 100644 --- a/vipra-cmd/runcfg/CMD.launch +++ b/vipra-cmd/runcfg/CMD.launch @@ -11,7 +11,7 @@ </listAttribute> <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="de.vipra.cmd.Main"/> -<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-eS test -CE"/> +<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="-S test -I /home/eike/repos/master/ma-impl/docker/data/test-1.json"/> <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="vipra-cmd"/> <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/> <stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-ea"/> diff --git a/vipra-cmd/src/main/hypernyms/hypernyms.ser b/vipra-cmd/src/main/hypernyms/hypernyms.ser new file mode 100644 index 0000000000000000000000000000000000000000..f9d67b229ecf24697f656133cf7f300a338e84f7 Binary files /dev/null and b/vipra-cmd/src/main/hypernyms/hypernyms.ser differ diff --git a/vipra-cmd/src/main/hypernyms/hypernyms.ser.gz b/vipra-cmd/src/main/hypernyms/hypernyms.ser.gz deleted file mode 100644 index b21d7089a99cdd1c9a80749854ac815157c3019e..0000000000000000000000000000000000000000 Binary files a/vipra-cmd/src/main/hypernyms/hypernyms.ser.gz and /dev/null differ diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java index cdfcb282d6d3d9780c4a34cfc349d25c6aeaf94c..bfe00e6144c488333b0c151fc8cc93982c8ca042 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/Filebase.java @@ -68,19 +68,19 @@ public class Filebase { if (entry.isNew()) { final ArticleFull newArticle = newArticles.get(entry.getId()); if (modelConfig.isProcessorUseText()) - outModel.write(wordIndex.transform(newArticle.getProcessedText(), true)); + outModel.write(wordIndex.transform(newArticle.getProcessedText())); outModel.write(" "); if (modelConfig.isProcessorUseEntities()) - outModel.write(wordIndex.transform(newArticle.entities(), false)); + outModel.write(wordIndex.transform(newArticle.entities())); else if (modelConfig.isProcessorUseHypernyms()) - outModel.write(wordIndex.transform(newArticle.hypernyms(), false)); + outModel.write(wordIndex.transform(newArticle.hypernyms())); outModel.write(" "); if (modelConfig.isProcessorUseEntityTypes()) - outModel.write(wordIndex.transform(newArticle.types(), false)); + outModel.write(wordIndex.transform(newArticle.types())); outModel.write(Constants.LINE_SEP); } else { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java index e035f3ccf988fa6df2225032b7608075fe76aaee..332a8b1e666fa698082ec00fcb2693bd9c0b53c1 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/file/FilebaseWordIndex.java @@ -13,6 +13,7 @@ import java.util.Set; import de.vipra.util.CountMap; import de.vipra.util.FileUtils; +import de.vipra.util.StringUtils; public class FilebaseWordIndex implements Iterable<String> { @@ -50,13 +51,13 @@ public class FilebaseWordIndex implements Iterable<String> { dirty = false; } - public String transform(final String[] words, final boolean dbInsert) { + public String transform(final String[] words) { final CountMap<String> countMap = new CountMap<>(); for (final String word : words) { if (word != null && !word.trim().isEmpty()) { countMap.count(word.trim()); - if (dbInsert) - newWords.add(word); + if (StringUtils.isWord(word)) + newWords.add(word.toLowerCase().trim()); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/BackupCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/BackupCommand.java index 57e6875873b2408ac3ca376863776f40de999d08..d7981dd179a8e2206a3674952cc95ad8b1ad54a5 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/BackupCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/BackupCommand.java @@ -10,6 +10,8 @@ import org.zeroturnaround.zip.ZipUtil; import de.vipra.util.Config; import de.vipra.util.ConsoleUtils; import de.vipra.util.FileUtils; +import de.vipra.util.StringUtils; +import de.vipra.util.Timer; public class BackupCommand implements Command { @@ -22,25 +24,28 @@ public class BackupCommand implements Command { @Override public void run() throws Exception { try { + final Timer timer = new Timer(); + ConsoleUtils.info("creating backup"); + final Config config = Config.getConfig(); final File tmpTarget = FileUtils.getTempFile("vipra-dump"); org.apache.commons.io.FileUtils.deleteDirectory(tmpTarget); - ConsoleUtils.infoNOLF(" backup database..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_T + " backup database..."); final Process p = Runtime.getRuntime().exec("mongodump -d " + config.getDatabaseName() + " -h " + config.getDatabaseHost() + " --port " + config.getDatabasePort() + " -o " + new File(tmpTarget, "db")); p.waitFor(); ConsoleUtils.print(Ansi.ansi().fg(Color.GREEN).a("OK").reset().toString()); - ConsoleUtils.infoNOLF(" backup filebase..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_T + " backup filebase..."); org.apache.commons.io.FileUtils.copyDirectory(config.getDataDirectory(), new File(tmpTarget, "fb")); ConsoleUtils.print(Ansi.ansi().fg(Color.GREEN).a("OK").reset().toString()); - ConsoleUtils.infoNOLF(" backup configuration..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_T + " backup configuration..."); org.apache.commons.io.FileUtils.copyDirectory(Config.getGenericConfigDir(), new File(tmpTarget, "config")); ConsoleUtils.print(Ansi.ansi().fg(Color.GREEN).a("OK").reset().toString()); - ConsoleUtils.infoNOLF(" compressing..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_L + " compressing..."); File target = new File(path); if (target.exists() && target.isDirectory()) target = new File(target, "vipra-" + new Date().getTime() + ".zip"); @@ -48,7 +53,8 @@ public class BackupCommand implements Command { org.apache.commons.io.FileUtils.deleteDirectory(tmpTarget); ConsoleUtils.print(Ansi.ansi().fg(Color.GREEN).a("OK").reset().toString()); - ConsoleUtils.info("completed: " + target.getAbsolutePath()); + ConsoleUtils.info("saved to file: " + target.getAbsolutePath()); + ConsoleUtils.info("done in " + StringUtils.timeString(timer.total())); } catch (final Exception e) { ConsoleUtils.print(Ansi.ansi().fg(Color.RED).a("FAILED").reset().toString()); if (e.getMessage().contains("mongodump")) { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java index a96b0e20dd72e1a45a975b5bc544efb34c57f82b..31d64c373c45e7ce78b5eb4a0cbf775e376d9423 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/EditModelCommand.java @@ -64,11 +64,13 @@ public class EditModelCommand implements Command { topicModelConfig .setProcessorUseEntityTypes(ConsoleUtils.readBoolean("processor use entity types", topicModelConfig.isProcessorUseEntityTypes())); topicModelConfig.setProcessorUseHypernyms(ConsoleUtils.readBoolean("processor use hypernyms", topicModelConfig.isProcessorUseHypernyms())); + topicModelConfig.setQueryEntityDescriptions(ConsoleUtils.readBoolean("query entity types", topicModelConfig.isQueryEntityDescriptions())); topicModelConfig .setWindowResolution(ConsoleUtils.readEnum(WindowResolution.class, "window resolution", topicModelConfig.getWindowResolution())); dbTopicModels.updateSingle(topicModel, "modelConfig"); topicModelConfig.saveToFile(topicModelConfig.getModelDir(config.getDataDirectory())); + config.setTopicModelConfig(topicModelConfig); } @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/EraseCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/EraseCommand.java index 29ffaed73d7742e9b3ab6585a6d4d4d0b23da119..edde5a1492331d838f408c9e5a2cec98f934d1a9 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/EraseCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/EraseCommand.java @@ -17,6 +17,8 @@ public class EraseCommand implements Command { private Client elasticClient; private void clear() throws Exception { + ConsoleUtils.info("erasing database"); + config = Config.getConfig(); elasticClient = ESClient.getClient(config); MongoService.dropDatabase(config); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java index 1ac3556a896244da31a2ccc87d45cd3dbb68709d..825364be0e87816a8c48fcba57e5a77fa113e79f 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ImportCommand.java @@ -9,7 +9,6 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.regex.Matcher; import java.util.regex.Pattern; import org.bson.types.ObjectId; @@ -22,6 +21,7 @@ import de.vipra.cmd.file.Filebase; import de.vipra.cmd.file.FilebaseException; import de.vipra.cmd.file.FilebaseWindowIndex; import de.vipra.cmd.file.FilebaseWordIndex; +import de.vipra.cmd.text.DBPediaAnalyzer; import de.vipra.cmd.text.HypernymAnalyzer; import de.vipra.cmd.text.ProcessedText; import de.vipra.cmd.text.Processor; @@ -31,12 +31,14 @@ import de.vipra.cmd.text.SpotlightResponse; import de.vipra.util.Config; import de.vipra.util.ConsoleUtils; import de.vipra.util.Constants; +import de.vipra.util.CountMap; import de.vipra.util.StringUtils; import de.vipra.util.Timer; import de.vipra.util.ex.ConfigException; import de.vipra.util.ex.DatabaseException; import de.vipra.util.model.ArticleFull; import de.vipra.util.model.ArticleStats; +import de.vipra.util.model.ArticleWord; import de.vipra.util.model.TextEntity; import de.vipra.util.model.TextEntityCount; import de.vipra.util.model.TextEntityFull; @@ -169,12 +171,15 @@ public class ImportCommand implements Command { ConsoleUtils.info(ConsoleUtils.positionString(current, max) + " skipped \"" + object.get("title") + "\" (" + processedText.getReducedWordCount() + ")"); } else { + Set<String> blockedWords = new HashSet<>(0); + final CountMap<String> wordCounts = processedText.getWordCounts(); + final List<ArticleWord> articleWords = processedText.getArticleWords(); + String articleText = article.getText(); + // spotlight analysis if (spotlightAnalyzer != null) { final SpotlightResponse spotlightResponse = spotlightAnalyzer.analyze(article.getText()); - String articleText = article.getText(); - final List<TextEntityCount> textEntitiesCounts = spotlightResponse.getEntities(); if (textEntitiesCounts != null) { // replace entities with hypernyms @@ -190,6 +195,8 @@ public class ImportCommand implements Command { } } + blockedWords = new HashSet<>(textEntitiesCounts.size()); + // insert entities into text for (final TextEntityCount textEntityCount : textEntitiesCounts) { // get new text entity @@ -197,29 +204,41 @@ public class ImportCommand implements Command { if (newTextEntity.getEntity() == null || newTextEntity.getEntity().isEmpty()) continue; - // get descriptions from dbpedia - // newTextEntity.setDescription(DBPediaAnalyzer.getAbstract(newTextEntity.getUrl())); - newTextEntities.add(newTextEntity); // insert entity into text articleText = articleText.replaceAll( - "(?i)\\b" + Pattern.quote(textEntityCount.getEntity().getEntity()) + "\\b(?![^<]*>|[^<>]*</)", - Matcher.quoteReplacement(newTextEntity.aTag(textEntityCount.getEntity().getEntity()))); + "(?i)\\b(" + Pattern.quote(textEntityCount.getEntity().getEntity()) + ")\\b(?![^<]*>|[^<>]*</)", + newTextEntity.aTag("$1")); + + // set to ignore entity later when replacing words + blockedWords.add(textEntityCount.getEntity().getEntity().toLowerCase()); // replace entity surface form by resource form textEntityCount.getEntity().setEntity(newTextEntity.getEntity()); + + // add entities as words if not already words + if (newTextEntity.getIsWord() && !wordCounts.contains(newTextEntity.getEntity())) { + final ArticleWord newArticleWord = new ArticleWord(newTextEntity.getEntity(), textEntityCount.getCount()); + articleWords.add(newArticleWord); + } } article.setEntities(textEntitiesCounts); - article.setText(articleText); } } + for (final ArticleWord word : processedText.getArticleWords()) { + if (blockedWords.contains(word.getWord())) + continue; + articleText = articleText.replaceAll("(?i)\\b(" + word.getWord() + ")\\b(?![^<]*>|[^<>]*</)", word.aTag("$1")); + } + article.setProcessedText(processedText.getWords()); - article.setWords(processedText.getArticleWords()); + article.setWords(articleWords); article.setTopicModel(new TopicModel(topicModelFull.getId())); article.setWindow(windowIndex.getWindow(article.getDate())); + article.setText(articleText); // generate article stats final ArticleStats stats = new ArticleStats(); @@ -241,9 +260,9 @@ public class ImportCommand implements Command { } catch (final FilebaseException e) { ConsoleUtils.error("could not save processed article in the filebase '" + article.getTitle() + "'"); } catch (final IOException e) { - ConsoleUtils.error("io error"); + ConsoleUtils.error("io error: " + e.getMessage()); } catch (final ClassNotFoundException e) { - ConsoleUtils.error("could not initialize hypernym analyzer"); + ConsoleUtils.error("error: " + e.getMessage()); } } @@ -332,11 +351,6 @@ public class ImportCommand implements Command { } dbWords.createMultiple(newWords); - /* - * add new entities - */ - dbEntities.createMultiple(newTextEntities); - /* * add new windows */ @@ -348,6 +362,22 @@ public class ImportCommand implements Command { } dbWindows.createMultiple(newWindows); + /* + * add new entities + */ + if (modelConfig.isQueryEntityDescriptions()) { + ConsoleUtils.info("querying descriptions of new entities"); + for (final TextEntityFull textEntity : newTextEntities) { + try { + // get description from dbpedia + textEntity.setDescription(DBPediaAnalyzer.getAbstract(textEntity.getUrl(), true)); + } catch (final Exception e) { + ConsoleUtils.error("could not query description for entity: '" + textEntity.getEntity() + "'"); + } + } + } + dbEntities.createMultiple(newTextEntities); + /* * update topic model */ @@ -360,7 +390,7 @@ public class ImportCommand implements Command { /* * run information */ - ConsoleUtils.info(" done in " + StringUtils.timeString(timer.total())); + ConsoleUtils.info("done in " + StringUtils.timeString(timer.total())); } @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java index 7c7b759974551790b0451379ccf5a6455fb3edb6..f929eb0c538b117b104e4562cf1ba2f5c2354402 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/IndexingCommand.java @@ -78,7 +78,7 @@ public class IndexingCommand implements Command { dbTopicModels.updateSingle(topicModel, "lastIndexed"); // run information - ConsoleUtils.info(" done in " + StringUtils.timeString(timer.total())); + ConsoleUtils.info("done in " + StringUtils.timeString(timer.total())); } @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java index ce172115a47c5c350d38ce8a3e3a9fc8d90b4137..8340d85fcddd1fa30f53e97bce23ffa953851bb0 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/ModelingCommand.java @@ -47,7 +47,7 @@ public class ModelingCommand implements Command { /* * run information */ - ConsoleUtils.info(" done in " + StringUtils.timeString(timer.total())); + ConsoleUtils.info("done in " + StringUtils.timeString(timer.total())); } @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/RestoreCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/RestoreCommand.java index c1bc3024516bcc7c4c3d87290044d67644a0afd8..20371031510ae095e9609ea27f5785a6d02ab5eb 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/RestoreCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/RestoreCommand.java @@ -10,6 +10,8 @@ import org.zeroturnaround.zip.ZipUtil; import de.vipra.util.Config; import de.vipra.util.ConsoleUtils; import de.vipra.util.FileUtils; +import de.vipra.util.StringUtils; +import de.vipra.util.Timer; public class RestoreCommand implements Command { @@ -22,6 +24,9 @@ public class RestoreCommand implements Command { @Override public void run() throws Exception { try { + final Timer timer = new Timer(); + ConsoleUtils.info("restoring from file"); + final File zip = new File(path); if (!zip.isFile()) throw new FileNotFoundException(path); @@ -29,19 +34,24 @@ public class RestoreCommand implements Command { ZipUtil.unpack(zip, tmpTarget); final Config config = Config.getConfig(); - ConsoleUtils.infoNOLF(" restore database..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_T + " restore database..."); final Process p = Runtime.getRuntime().exec( "mongorestore --drop -h " + config.getDatabaseHost() + " --port " + config.getDatabasePort() + " " + new File(tmpTarget, "db")); p.waitFor(); ConsoleUtils.print(Ansi.ansi().fg(Color.GREEN).a("OK").reset().toString()); - ConsoleUtils.infoNOLF(" restore filebase..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_T + " restore filebase..."); org.apache.commons.io.FileUtils.copyDirectory(new File(tmpTarget, "fb"), config.getDataDirectory()); ConsoleUtils.print(Ansi.ansi().fg(Color.GREEN).a("OK").reset().toString()); - ConsoleUtils.infoNOLF(" restore configuration..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_L + " restore configuration..."); org.apache.commons.io.FileUtils.copyDirectory(new File(tmpTarget, "config"), Config.getGenericConfigDir()); ConsoleUtils.print(Ansi.ansi().fg(Color.GREEN).a("OK").reset().toString()); + + org.apache.commons.io.FileUtils.deleteDirectory(tmpTarget); + + ConsoleUtils.info("restored"); + ConsoleUtils.info("done in " + StringUtils.timeString(timer.total())); } catch (final Exception e) { ConsoleUtils.print(Ansi.ansi().fg(Color.RED).a("FAILED").reset().toString()); if (e.getMessage().contains("mongorestore")) { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java index b0aa02e8761d704bc6549b24410a81a37bdfd9f5..04fef5075bc43608f5a4a93ed94c0ef69d22394b 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/TestCommand.java @@ -22,13 +22,15 @@ public class TestCommand implements Command { @Override public void run() throws Exception { try { + ConsoleUtils.info("testing system"); + // test if configuration readable - ConsoleUtils.infoNOLF("reading configuration..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_T + " reading configuration..."); final Config config = Config.getConfig(true); ConsoleUtils.print(Ansi.ansi().fg(Color.GREEN).a("OK").reset().toString()); // test if dtm is accessible - ConsoleUtils.infoNOLF("testing dtm binary..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_T + " testing dtm binary..."); if (config.getDtmPath() == null || config.getDtmPath().isEmpty()) throw new Exception("dtm binary not configured, set 'dtmPath' in config.json"); final File dtm = new File(config.getDtmPath()); @@ -37,13 +39,14 @@ public class TestCommand implements Command { ConsoleUtils.print(Ansi.ansi().fg(Color.GREEN).a("OK").reset().toString()); // test if database is accessible - ConsoleUtils.infoNOLF("testing mongodb connection..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_T + " testing mongodb connection..."); final MongoService<Article, ObjectId> dbArticles = MongoService.getDatabaseService(config, Article.class); dbArticles.count(null); ConsoleUtils.print(Ansi.ansi().fg(Color.GREEN).a("OK").reset().toString()); // test if elasticsearch is accessible - ConsoleUtils.infoNOLF("testing elasticsearch connection..."); + ConsoleUtils.infoNOLF( + " " + (config.isSpotlightEnabled() ? ConsoleUtils.PATH_T : ConsoleUtils.PATH_L) + " testing elasticsearch connection..."); final TransportClient esclient = ESClient.getClient(config); if (esclient.connectedNodes().isEmpty()) throw new NoNodeAvailableException("no elasticsearch nodes available"); @@ -51,7 +54,7 @@ public class TestCommand implements Command { // test if spotlight is accessible if (config.isSpotlightEnabled()) { - ConsoleUtils.infoNOLF("testing spotlight connection..."); + ConsoleUtils.infoNOLF(" " + ConsoleUtils.PATH_L + " testing spotlight connection..."); final URL url = new URL(config.getSpotlightUrl() + "/rest/application.wadl"); final HttpURLConnection huc = (HttpURLConnection) url.openConnection(); huc.setRequestMethod("HEAD"); diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/option/VersionCommand.java b/vipra-cmd/src/main/java/de/vipra/cmd/option/VersionCommand.java index 5ecd8fc120a75a853d69c7295f8fb8891fb0641f..44bddcac6928d0a977f358910987ff8f6f50d638 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/option/VersionCommand.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/option/VersionCommand.java @@ -16,9 +16,9 @@ public class VersionCommand implements Command { props.load(in); ConsoleUtils.info("VIPRA CMD tool"); - ConsoleUtils.info("Version: " + props.getProperty("git-tag")); - ConsoleUtils.info("Commit : " + props.getProperty("git-sha-1")); - ConsoleUtils.info("From : " + props.getProperty("builddate")); + ConsoleUtils.info(" " + ConsoleUtils.PATH_T + " Version: " + props.getProperty("git-tag")); + ConsoleUtils.info(" " + ConsoleUtils.PATH_T + " Commit : " + props.getProperty("git-sha-1")); + ConsoleUtils.info(" " + ConsoleUtils.PATH_L + " From : " + props.getProperty("builddate")); } @Override diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/DBPediaAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/DBPediaAnalyzer.java index 604e26d26be50518bfb1e952bd02840e5e774977..b3c908448e6069dd1fbc527b8aa4b4275bb3365e 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/DBPediaAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/DBPediaAnalyzer.java @@ -17,46 +17,53 @@ public class DBPediaAnalyzer { public static final String DBPEDIA_SPARQL_ENDPOINT = "http://dbpedia.org/sparql"; @SuppressWarnings({ "unchecked", "rawtypes" }) - public static String getAbstract(final String resourceURL) throws IOException { + public static String getAbstract(final String resourceURL, final boolean retry) throws Exception { final String query = "select ?abstract where {<" + resourceURL + "> <http://dbpedia.org/ontology/abstract> ?abstract filter(langMatches(lang(?abstract),\"en\"))}"; - final String strUrl = DBPEDIA_SPARQL_ENDPOINT + "?default-graph-uri=" + URLEncoder.encode("http://dbpedia.org", "UTF-8") - + "&format=application%2Fsparql-results%2Bjson&CXML_redir_for_subjs=121&CXML_redir_for_hrefs=&timeout=30000&debug=on&query=" - + URLEncoder.encode(query, "UTF-8"); - - final URL url = new URL(strUrl); - final String result = IOUtils.toString(url.openStream()); - - if (result != null) { - final ObjectMapper mapper = new ObjectMapper(); - final Map<String, Object> map = mapper.readValue(result, new TypeReference<HashMap>() {}); - - Object o = map.get("results"); - if (o != null && o instanceof Map) { - o = ((Map<String, Object>) o).get("bindings"); - if (o != null && o instanceof List) { - final List<Object> l = (List<Object>) o; - for (int i = 0; i < l.size(); i++) { - o = l.get(i); - if (o != null && o instanceof Map) { - o = ((Map<String, Object>) o).get("abstract"); - if (o != null && o instanceof Map) { - o = ((Map<String, Object>) o).get("value"); - if (o != null && o instanceof String) - return (String) o; + + int retries = 0; + while (++retries <= 2) { + try { + final String strUrl = DBPEDIA_SPARQL_ENDPOINT + "?default-graph-uri=" + URLEncoder.encode("http://dbpedia.org", "UTF-8") + + "&format=application%2Fsparql-results%2Bjson&CXML_redir_for_subjs=121&CXML_redir_for_hrefs=&timeout=30000&debug=on&query=" + + URLEncoder.encode(query, "UTF-8"); + + final URL url = new URL(strUrl); + final String result = IOUtils.toString(url.openStream()); + + if (result != null) { + final ObjectMapper mapper = new ObjectMapper(); + final Map<String, Object> map = mapper.readValue(result, new TypeReference<HashMap>() {}); + + Object o = map.get("results"); + if (o != null && o instanceof Map) { + o = ((Map<String, Object>) o).get("bindings"); + if (o != null && o instanceof List) { + final List<Object> l = (List<Object>) o; + for (int i = 0; i < l.size(); i++) { + o = l.get(i); + if (o != null && o instanceof Map) { + o = ((Map<String, Object>) o).get("abstract"); + if (o != null && o instanceof Map) { + o = ((Map<String, Object>) o).get("value"); + if (o != null && o instanceof String) + return (String) o; + } + } } } } } + return null; + } catch (final IOException e) { + try { + Thread.sleep(100); + } catch (final InterruptedException e1) {} } } - return null; - } - - public static void main(final String[] args) throws IOException { - getAbstract("http://dbpedia.org/resource/Actor"); + throw new Exception("dbpedia query error"); } } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/HypernymAnalyzer.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/HypernymAnalyzer.java index 4f9f56421ae44ada493d6e434cb4a367af4a4614..0be0924ccdd3d62ea5e480c307f3788754629770 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/HypernymAnalyzer.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/HypernymAnalyzer.java @@ -1,14 +1,13 @@ package de.vipra.cmd.text; -import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.ObjectInputStream; import java.util.Map; -import java.util.zip.GZIPInputStream; import org.apache.commons.lang3.StringEscapeUtils; +import org.zeroturnaround.zip.commons.IOUtils; +import de.ruedigermoeller.serialization.FSTConfiguration; import de.vipra.util.FileUtils; public class HypernymAnalyzer { @@ -19,10 +18,10 @@ public class HypernymAnalyzer { @SuppressWarnings("unchecked") private HypernymAnalyzer() throws IOException, ClassNotFoundException { - final InputStream in = FileUtils.getResource("hypernyms.ser.gz"); - final ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(in))); - hypernyms = (Map<String, String>) ois.readObject(); - ois.close(); + final InputStream in = FileUtils.getResource("hypernyms.ser"); + final byte[] barray = IOUtils.toByteArray(in); + final FSTConfiguration conf = FSTConfiguration.createDefaultConfiguration(); + hypernyms = (Map<String, String>) conf.asObject(barray); } public boolean containsEntity(final String entity) { diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java index ed941544596b403e8b34b2bd401f2e857e1ca1a8..e6c2eea2be4966c0aa15ea044e16fa282e021444 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/ProcessedText.java @@ -15,6 +15,7 @@ public class ProcessedText { private final long originalWordCount; private final long reducedWordCount; private final double reductionRatio; + private final CountMap<String> wordCounts; private final List<ArticleWord> articleWords; public ProcessedText(final String text, final long wordCount) { @@ -28,9 +29,10 @@ public class ProcessedText { reducedWordCount = words.length; reductionRatio = 1 - ((double) reducedWordCount / wordCount); - final CountMap<String> wordCounts = new CountMap<>(); + wordCounts = new CountMap<>(); for (final String word : words) wordCounts.count(word); + final List<ArticleWord> articleWords = new ArrayList<>(wordCounts.size()); for (final Entry<String, Integer> entry : wordCounts.entrySet()) articleWords.add(new ArticleWord(entry.getKey(), entry.getValue())); @@ -58,4 +60,8 @@ public class ProcessedText { return articleWords; } + public CountMap<String> getWordCounts() { + return wordCounts; + } + } diff --git a/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java b/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java index 0157f41fd941c92e7b6d3285b3197a26d32731c3..b977de033e3a2a91e5b24d0993f1e359158c07f9 100644 --- a/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java +++ b/vipra-cmd/src/main/java/de/vipra/cmd/text/StopwordsAnnotator.java @@ -27,7 +27,10 @@ public class StopwordsAnnotator implements Annotator, CoreAnnotation<Boolean> { @Override public void annotate(final Annotation annotation) { final List<CoreLabel> tokens = annotation.get(TokensAnnotation.class); - tokens.stream().filter(t -> stopWords.contains(t.word())).forEach(t -> t.set(StopwordsAnnotator.class, true)); + for (final CoreLabel token : tokens) { + if (stopWords.contains(token.word().toLowerCase().trim())) + token.set(StopwordsAnnotator.class, true); + } } @Override diff --git a/vipra-cmd/src/main/resources/config.json b/vipra-cmd/src/main/resources/config.json index b0b589f81342502719816096f32cd32508bf5a5a..a265d0e297666cc9da5dd4bcf30eaf62de6efeb1 100644 --- a/vipra-cmd/src/main/resources/config.json +++ b/vipra-cmd/src/main/resources/config.json @@ -17,7 +17,7 @@ "maxSimilarDocuments": 10, "documentMinimumLength": 1, "documentMinimumWordFrequency": 1, - "spotlightSupport": 200, + "spotlightSupport": 100, "spotlightConfidence": 0.5, "minRelativeProbability": 0.01, "risingDecayLambda": 0.0, @@ -26,6 +26,7 @@ "processorUseText": true, "processorUseEntities": true, "processorUseEntityTypes": true, - "processorUseHypernyms": true + "processorUseHypernyms": true, + "queryEntityDescriptions": true } } \ No newline at end of file diff --git a/vipra-ui/app/html/articles/entities.html b/vipra-ui/app/html/articles/entities.html new file mode 100644 index 0000000000000000000000000000000000000000..493746dfc6bb01612d96b6c7dbc512d5869e2f3d --- /dev/null +++ b/vipra-ui/app/html/articles/entities.html @@ -0,0 +1,64 @@ +<div ng-cloak ui-view></div> +<div class="container" ng-cloak ng-hide="!rootModels.topicModel || state.name !== 'articles.show.entities'"> + <div class="page-header no-border"> + <span class="label label-default">Article</span> + <h1 ng-bind="::article.title"></h1> + </div> + <div> + <ul class="nav nav-tabs" role="tablist"> + <li> + <a ui-sref="articles.show({id:id})"><i class="fa fa-file-text-o"></i></a> + </li> + <li class="active"> + <a>Entities</a> + </li> + <li> + <a ui-sref="articles.show.words({id:id})">Words</a> + </li> + <li> + <a ui-sref="network({type:'articles', id:id})"> + <i class="fa fa-sitemap"></i> Network + </a> + </li> + </ul> + <div class="tab-content"> + <div role="tabpanel" class="tab-pane active tab-entities"> + <br> + <div class="row"> + <div class="col-md-12" highcharts="entityDistribution"></div> + </div> + <div class="row"> + <div class="col-md-12"> + <div class="panel panel-default"> + <div class="panel-heading"> + Found <ng-pluralize count="allEntities.length||0" when="{0:'no entities',1:'1 entity',other:'{} unique entities'}"></ng-pluralize> for this article. + </div> + <table class="table table-bordered table-condensed table-fixed"> + <thead> + <tr> + <th ng-model="articlesShowModels.entitiesSort" sort-by="entity.entity">Entity</th> + <th ng-model="articlesShowModels.entitiesSort" sort-by="count">Count</th> + </tr> + </thead> + <tbody> + <tr ng-repeat="entity in entities | orderBy:articlesShowModels.entitiesSort"> + <td> + <entity-link entity="::entity.entity" /> + </td> + <td ng-bind="::entity.count"></td> + </tr> + </tbody> + </table> + <div class="panel-footer"> + <ng-pluralize count="entities.length" when="{0:'No entities',1:'First entity',other:'First {} entities'}"></ng-pluralize>. + <button class="btn btn-default btn-sm" ng-click="showMoreEntities()" ng-show="entities.length<allEntities.length" analytics-on analytics-event="Article Entities (more)" analytics-category="Article actions" ng-cloak>Show more</button> + <button class="btn btn-default btn-sm" ng-click="showAllEntities()" ng-show="entities.length<allEntities.length" analytics-on analytics-event="Article Entities (all)" analytics-category="Article actions" ng-cloak>Show all</button> + </div> + </div> + </div> + </div> + </div> + </div> + <div class="loading" ng-hide="article">Loading...</div> + </div> +</div> \ No newline at end of file diff --git a/vipra-ui/app/html/articles/show.html b/vipra-ui/app/html/articles/show.html index c6173ced1d7a2028c984bc2d8895d3164349abc4..1bce4b665b89a6a7389d11c54401bb5f49abf632 100644 --- a/vipra-ui/app/html/articles/show.html +++ b/vipra-ui/app/html/articles/show.html @@ -7,13 +7,13 @@ <div> <ul class="nav nav-tabs" role="tablist"> <li class="active"> - <a data-target=".tab-info" data-toggle="tab" bs-tab><i class="fa fa-file-text-o"></i></a> + <a><i class="fa fa-file-text-o"></i></a> </li> <li> - <a data-target=".tab-entities" data-toggle="tab" bs-tab shown="openTabEntities()">Entities</a> + <a ui-sref="articles.show.entities({id:article.id})">Entities</a> </li> <li> - <a data-target=".tab-words" data-toggle="tab" bs-tab shown="openTabWords()">Words</a> + <a ui-sref="articles.show.words({id:article.id})">Words</a> </li> <li> <a ui-sref="network({type:'articles', id:article.id})"> @@ -101,83 +101,6 @@ <hr> <div class="text-justify" ng-bind-html="::article.text"></div> </div> - <div role="tabpanel" class="tab-pane tab-entities"> - <br> - <div class="row"> - <div class="col-md-12" highcharts="entityDistribution"></div> - </div> - <div class="row"> - <div class="col-md-12"> - <div class="panel panel-default"> - <div class="panel-heading"> - Found <ng-pluralize count="allEntities.length||0" when="{0:'no entities',1:'1 entity',other:'{} unique entities'}"></ng-pluralize> for this article. - </div> - <table class="table table-bordered table-condensed table-fixed"> - <thead> - <tr> - <th ng-model="articlesShowModels.entitiesSort" sort-by="entity.id">Entity</th> - <th ng-model="articlesShowModels.entitiesSort" sort-by="count">Count</th> - </tr> - </thead> - <tbody> - <tr ng-repeat="entity in entities | orderBy:articlesShowModels.entitiesSort"> - <td> - <entity-link entity="::entity.entity" /> - </td> - <td ng-bind="::entity.count"></td> - </tr> - </tbody> - </table> - <div class="panel-footer"> - <ng-pluralize count="entities.length" when="{0:'No entities',1:'First entity',other:'First {} entities'}"></ng-pluralize>. - <button class="btn btn-default btn-sm" ng-click="showMoreEntities()" ng-show="entities.length<allEntities.length" analytics-on analytics-event="Article Entities (more)" analytics-category="Article actions" ng-cloak>Show more</button> - <button class="btn btn-default btn-sm" ng-click="showAllEntities()" ng-show="entities.length<allEntities.length" analytics-on analytics-event="Article Entities (all)" analytics-category="Article actions" ng-cloak>Show all</button> - </div> - </div> - </div> - </div> - </div> - <div role="tabpanel" class="tab-pane tab-words"> - <br> - <div class="row"> - <div class="col-md-12" highcharts="wordDistribution"></div> - </div> - <div class="row"> - <div class="col-md-12"> - <div class="panel panel-default"> - <div class="panel-heading"> - Found <ng-pluralize count="allWords.length||0" when="{0:'no words',1:'1 word',other:'{} unique words'}"></ng-pluralize> for this article.<br> - Article has <ng-pluralize count="article.stats.wordCount||0" when="{0:'no words',1:'1 word',other:'{} words'}"></ng-pluralize>, <span ng-bind-template="{{::article.stats.processedWordCount}} after cleaning ({{::Vipra.toPercent(article.stats.reductionRatio)}}% reduction)" ng-show="article.stats.wordCount>0" ng-cloak></span>. - </div> - <table class="table table-bordered table-condensed table-fixed"> - <thead> - <tr> - <th ng-model="articlesShowModels.wordsSort" sort-by="id">Word</th> - <th ng-model="articlesShowModels.wordsSort" sort-by="count">Count</th> - <th>Share</th> - <th>Reduced share <info text="Word share applied on the processed text"/></th> - </tr> - </thead> - <tbody> - <tr ng-repeat="word in words | orderBy:articlesShowModels.wordsSort"> - <td> - <word-link word="::word" /> - </td> - <td ng-bind="::word.count"></td> - <td ng-bind-template="{{::Vipra.toPercent(word.count/article.stats.wordCount, 2)}}%"></td> - <td ng-bind-template="{{::Vipra.toPercent(word.count/article.stats.processedWordCount, 2)}}%"></td> - </tr> - </tbody> - </table> - <div class="panel-footer"> - <ng-pluralize count="words.length" when="{0:'No words',1:'First word',other:'First {} words'}"></ng-pluralize>. - <button class="btn btn-default btn-sm" ng-click="showMoreWords()" ng-show="words.length<allWords.length" analytics-on analytics-event="Article Words (More)" analytics-category="Article actions" ng-cloak>Show more</button> - <button class="btn btn-default btn-sm" ng-click="showAllWords()" ng-show="words.length<allWords.length" analytics-on analytics-event="Article Words (All)" analytics-category="Article actions" ng-cloak>Show all</button> - </div> - </div> - </div> - </div> - </div> </div> <div class="loading" ng-hide="article">Loading...</div> </div> diff --git a/vipra-ui/app/html/articles/words.html b/vipra-ui/app/html/articles/words.html new file mode 100644 index 0000000000000000000000000000000000000000..7c9a87ba6b44d2822828dd5839efa5fb72c59fd9 --- /dev/null +++ b/vipra-ui/app/html/articles/words.html @@ -0,0 +1,69 @@ +<div ng-cloak ui-view></div> +<div class="container" ng-cloak ng-hide="!rootModels.topicModel || state.name !== 'articles.show.words'"> + <div class="page-header no-border"> + <span class="label label-default">Article</span> + <h1 ng-bind="::article.title"></h1> + </div> + <div> + <ul class="nav nav-tabs" role="tablist"> + <li> + <a ui-sref="articles.show({id:id})"><i class="fa fa-file-text-o"></i></a> + </li> + <li> + <a ui-sref="articles.show.entities({id:id})">Entities</a> + </li> + <li class="active"> + <a>Words</a> + </li> + <li> + <a ui-sref="network({type:'articles', id:id})"> + <i class="fa fa-sitemap"></i> Network + </a> + </li> + </ul> + <div class="tab-content"> + <div role="tabpanel" class="tab-pane active tab-words"> + <br> + <div class="row"> + <div class="col-md-12" highcharts="wordDistribution"></div> + </div> + <div class="row"> + <div class="col-md-12"> + <div class="panel panel-default"> + <div class="panel-heading"> + Found <ng-pluralize count="allWords.length||0" when="{0:'no words',1:'1 word',other:'{} unique words'}"></ng-pluralize> for this article.<br> + Article has <ng-pluralize count="article.stats.wordCount||0" when="{0:'no words',1:'1 word',other:'{} words'}"></ng-pluralize>, <span ng-bind-template="{{::article.stats.processedWordCount}} after cleaning ({{::Vipra.toPercent(article.stats.reductionRatio)}}% reduction)" ng-show="article.stats.wordCount>0" ng-cloak></span>. + </div> + <table class="table table-bordered table-condensed table-fixed"> + <thead> + <tr> + <th ng-model="articlesShowModels.wordsSort" sort-by="id">Word</th> + <th ng-model="articlesShowModels.wordsSort" sort-by="count">Count</th> + <th>Share</th> + <th>Reduced share <info text="Word share applied on the processed text"/></th> + </tr> + </thead> + <tbody> + <tr ng-repeat="word in words | orderBy:articlesShowModels.wordsSort"> + <td> + <word-link word="::word" /> + </td> + <td ng-bind="::word.count"></td> + <td ng-bind-template="{{::Vipra.toPercent(word.count/article.stats.wordCount, 2)}}%"></td> + <td ng-bind-template="{{::Vipra.toPercent(word.count/article.stats.processedWordCount, 2)}}%"></td> + </tr> + </tbody> + </table> + <div class="panel-footer"> + <ng-pluralize count="words.length" when="{0:'No words',1:'First word',other:'First {} words'}"></ng-pluralize>. + <button class="btn btn-default btn-sm" ng-click="showMoreWords()" ng-show="words.length<allWords.length" analytics-on analytics-event="Article Words (More)" analytics-category="Article actions" ng-cloak>Show more</button> + <button class="btn btn-default btn-sm" ng-click="showAllWords()" ng-show="words.length<allWords.length" analytics-on analytics-event="Article Words (All)" analytics-category="Article actions" ng-cloak>Show all</button> + </div> + </div> + </div> + </div> + </div> + </div> + <div class="loading" ng-hide="article">Loading...</div> + </div> +</div> \ No newline at end of file diff --git a/vipra-ui/app/html/directives/entity-menu.html b/vipra-ui/app/html/directives/entity-menu.html index 2c397f87213e8af260677eb7ed3f1c1491f569ee..f386f64aabad450eba56e5e815b27a25752a938c 100644 --- a/vipra-ui/app/html/directives/entity-menu.html +++ b/vipra-ui/app/html/directives/entity-menu.html @@ -3,9 +3,9 @@ <i class="fa fa-caret-down"></i> </a> <ul class="dropdown-menu" ng-class="{'dropdown-menu-right':dropdownRight}"> - <li><a ui-sref="entities.show({id:entity.id})">Show</a></li> + <li><a ui-sref="entities.show({id:entity.entity})">Show</a></li> <li role="separator" class="divider"></li> - <li><a ui-sref="entities.show.articles({id:entity.id})">Articles</a></li> + <li><a ui-sref="entities.show.articles({id:entity.entity})">Articles</a></li> <li role="separator" class="divider"></li> <li><a ng-href="{{entity.url}}" target="_blank"><span class="dbpedia-logo"></span> DBPedia</a></li> </ul> diff --git a/vipra-ui/app/html/directives/word-link.html b/vipra-ui/app/html/directives/word-link.html index 981f9e7306030c6963defb46c39a149532b7607c..ef93cf21f8d0ab6ff5355603a29f648b711bfc4e 100644 --- a/vipra-ui/app/html/directives/word-link.html +++ b/vipra-ui/app/html/directives/word-link.html @@ -1,6 +1,6 @@ <div class="link-wrapper"> - <span class="menu-padding ellipsis"> - <a class="title" ui-sref="words.show({id: word.word})" ng-bind="word.word" ng-attr-title="{{::word.word}}"></a> + <span class="ellipsis" ng-class="{'menu-padding':showMenu}"> + <a class="title" ui-sref="words.show({id: id})" ng-bind="id" ng-attr-title="{{::id}}"></a> </span> <word-menu class="menu-button" word="word" ng-if="::showMenu" /> </div> \ No newline at end of file diff --git a/vipra-ui/app/html/directives/word-menu.html b/vipra-ui/app/html/directives/word-menu.html index c648b033ad287fd96ab82ce8c5d2cf1e275222e9..97c31b2712070037b30c2b0e95ffbf6d4174c507 100644 --- a/vipra-ui/app/html/directives/word-menu.html +++ b/vipra-ui/app/html/directives/word-menu.html @@ -3,9 +3,9 @@ <i class="fa fa-caret-down"></i> </a> <ul class="dropdown-menu" ng-class="{'dropdown-menu-right':dropdownRight}"> - <li><a ui-sref="words.show({id:word.id})">Show</a></li> + <li><a ui-sref="words.show({id:id})">Show</a></li> <li role="separator" class="divider"></li> - <li><a ui-sref="words.show.topics({id:word.id})">Topics</a></li> - <li><a ui-sref="words.show.articles({id:word.id})">Articles</a></li> + <li><a ui-sref="words.show.topics({id:id})">Topics</a></li> + <li><a ui-sref="words.show.articles({id:id})">Articles</a></li> </ul> </div> \ No newline at end of file diff --git a/vipra-ui/app/html/entities/show.html b/vipra-ui/app/html/entities/show.html index a6b2fe07d4af346f3ddd8f68cc15d38d5efcc228..2dfffa79322afe2efe4be09c3a5d7e83b7ecd067 100644 --- a/vipra-ui/app/html/entities/show.html +++ b/vipra-ui/app/html/entities/show.html @@ -2,6 +2,7 @@ <div class="container" ng-cloak ng-hide="!rootModels.topicModel || state.name !== 'entities.show'"> <div class="page-header no-border"> <span class="label label-default">Entity</span> + <a class="label label-default" ng-if="entity.isWord" ui-sref="words.show({id:entity.entity})">Word</a> <h1 ng-bind="::entity.entity"></h1> </div> <div> @@ -30,12 +31,19 @@ </td> </tr> <tr> - <th class="infocol">Hypernym</th> + <th class="infocol">Hypernym <info text="Hypernyms are superordinate words, they describe collections of semantically similar words. For example, 'Color' is the hypernym for 'blue' and 'red'."/></th> <td ng-bind-template="{{ entity.isHypernym ? 'Yes' : 'No' }}"></td> </tr> + <tr> + <th class="infocol">Word</th> + <td> + <span ng-bind-template="{{ entity.isWord ? 'Yes' : 'No' }}"></span> + <word-link word="::entity.entity" ng-if="entity.isWord" /> + </td> + </tr> <tr> <th class="infocol">Types</th> - <td ng-bind-template="{{entity.types.join(', ') || 'No types'}}" ng-class="{'text-muted':!entity.types}"></td> + <td ng-bind-html="::types" ng-class="{'text-muted':!entity.types}"></td> </tr> <tr> <th class="infocol">Description</th> diff --git a/vipra-ui/app/html/topics/articles.html b/vipra-ui/app/html/topics/articles.html index 24c8c059471d1c39822e5dbf7f0c0aaf7af3fc3c..d0232f29adf269e8f1df97f5dc64e687271f4235 100644 --- a/vipra-ui/app/html/topics/articles.html +++ b/vipra-ui/app/html/topics/articles.html @@ -7,16 +7,16 @@ <div> <ul class="nav nav-tabs" role="tablist"> <li> - <a ui-sref="topics.show({id:topic.id})"><i class="fa fa-file-text-o"></i></a> + <a ui-sref="topics.show({id:id})"><i class="fa fa-file-text-o"></i></a> </li> <li> - <a ui-sref="topics.show.sequences({id:topic.id})">Sequences</a> + <a ui-sref="topics.show.sequences({id:id})">Sequences</a> </li> <li class="active"> <a>Articles</a> </li> <li> - <a ui-sref="network({type:'topics', id:topic.id})"> + <a ui-sref="network({type:'topics', id:id})"> <i class="fa fa-sitemap"></i> Network </a> </li> diff --git a/vipra-ui/app/html/topics/sequences.html b/vipra-ui/app/html/topics/sequences.html index ab4a698be52d06b840634d5d237a343f0cc7afb2..78ec3f0f2f542947979c8c958d2cea147934e987 100644 --- a/vipra-ui/app/html/topics/sequences.html +++ b/vipra-ui/app/html/topics/sequences.html @@ -7,16 +7,16 @@ <div> <ul class="nav nav-tabs" role="tablist"> <li> - <a ui-sref="topics.show({id:topic.id})"><i class="fa fa-file-text-o"></i></a> + <a ui-sref="topics.show({id:id})"><i class="fa fa-file-text-o"></i></a> </li> <li class="active"> <a>Sequences</a> </li> <li> - <a ui-sref="topics.show.articles({id:topic.id})">Articles</a> + <a ui-sref="topics.show.articles({id:id})">Articles</a> </li> <li> - <a ui-sref="network({type:'topics', id:topic.id})"> + <a ui-sref="network({type:'topics', id:id})"> <i class="fa fa-sitemap"></i> Network </a> </li> diff --git a/vipra-ui/app/html/topics/show.html b/vipra-ui/app/html/topics/show.html index 55f475a72ae789016a8fe6f48466708a81955815..515c53108ac04b80b2e5b0c43e8990288447645d 100644 --- a/vipra-ui/app/html/topics/show.html +++ b/vipra-ui/app/html/topics/show.html @@ -23,7 +23,7 @@ <a class="dropdown-toggle" data-toggle="dropdown" href="#" role="button" aria-haspopup="true" aria-expanded="false"> Actions <span class="caret"></span> </a> - <ul class="dropdown-menu"> + <ul class="dropdown-menu pull-left"> <li><a ng-click="startRename()" analytics-on analytics-event="Topic Rename (Start)" analytics-category="Topic actions">Rename</a></li> </ul> </li> diff --git a/vipra-ui/app/img/ui-article-entities.png b/vipra-ui/app/img/ui-article-entities.png new file mode 100644 index 0000000000000000000000000000000000000000..78b6f6835f6996a302ee42acf5183bc44e629a5c Binary files /dev/null and b/vipra-ui/app/img/ui-article-entities.png differ diff --git a/vipra-ui/app/img/ui-article-hover.png b/vipra-ui/app/img/ui-article-hover.png deleted file mode 100644 index d559761daa72e5688db79bd6cb78afb9110ff428..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-article-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-article-words.png b/vipra-ui/app/img/ui-article-words.png new file mode 100644 index 0000000000000000000000000000000000000000..b36b7ae1db342ae10c2385eb89a2b11202cff41e Binary files /dev/null and b/vipra-ui/app/img/ui-article-words.png differ diff --git a/vipra-ui/app/img/ui-article.png b/vipra-ui/app/img/ui-article.png index 1729d6217c0090bdaddb160029fc987a182e302c..11f3dc82aa1359a9a3ea4cd240ff4058423e0641 100644 Binary files a/vipra-ui/app/img/ui-article.png and b/vipra-ui/app/img/ui-article.png differ diff --git a/vipra-ui/app/img/ui-articles-hover.png b/vipra-ui/app/img/ui-articles-hover.png deleted file mode 100644 index 22faabcccb8c0ddfc38be806a7ee3f038715b7c3..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-articles-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-articles.png b/vipra-ui/app/img/ui-articles.png index ff027a78596e0493da0fba829ab1428715cd93f3..1563d11fabe96066f68fffbd4617c46e25874672 100644 Binary files a/vipra-ui/app/img/ui-articles.png and b/vipra-ui/app/img/ui-articles.png differ diff --git a/vipra-ui/app/img/ui-entities-hover.png b/vipra-ui/app/img/ui-entities-hover.png deleted file mode 100644 index 56092fdca9f61d34ea82efa22441d9c16e0b7c28..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-entities-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-entities.png b/vipra-ui/app/img/ui-entities.png index 3e7d181ee12992517a52559eec9be03923e18666..93720c0ba85f69e3a95b301798a5763312047709 100644 Binary files a/vipra-ui/app/img/ui-entities.png and b/vipra-ui/app/img/ui-entities.png differ diff --git a/vipra-ui/app/img/ui-entity-articles.png b/vipra-ui/app/img/ui-entity-articles.png new file mode 100644 index 0000000000000000000000000000000000000000..ad21645fa1f307b83e3adb7f4cd27d53ccfe44e4 Binary files /dev/null and b/vipra-ui/app/img/ui-entity-articles.png differ diff --git a/vipra-ui/app/img/ui-entity-hover.png b/vipra-ui/app/img/ui-entity-hover.png deleted file mode 100644 index 4331895b334bec6f751b1721294d771941144806..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-entity-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-entity.png b/vipra-ui/app/img/ui-entity.png index 907e2df263c95ac17ea02e3594d211884b9d3c7e..fb8fa38c8058b0aec699aba11cfd1b8f58c152ad 100644 Binary files a/vipra-ui/app/img/ui-entity.png and b/vipra-ui/app/img/ui-entity.png differ diff --git a/vipra-ui/app/img/ui-explorer-hover.png b/vipra-ui/app/img/ui-explorer-hover.png deleted file mode 100644 index 6c91409a7d1141387f49ec2610bcafa9a7b81e71..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-explorer-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-explorer.png b/vipra-ui/app/img/ui-explorer.png index ceb5ee4c67e189d471ce4c131737a1f8d2dac6a8..dcdcf4f2260787054cb974b733a8ec72f775bdf1 100644 Binary files a/vipra-ui/app/img/ui-explorer.png and b/vipra-ui/app/img/ui-explorer.png differ diff --git a/vipra-ui/app/img/ui-keyboard.png b/vipra-ui/app/img/ui-keyboard.png new file mode 100644 index 0000000000000000000000000000000000000000..602a7144b3a1ebfbe11fe7bfde2fce8617356fd1 Binary files /dev/null and b/vipra-ui/app/img/ui-keyboard.png differ diff --git a/vipra-ui/app/img/ui-network-hover.png b/vipra-ui/app/img/ui-network-hover.png deleted file mode 100644 index c7216ae5d21c01d4fed64d2c6c00e7dd55583a46..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-network-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-network.png b/vipra-ui/app/img/ui-network.png index 17061c8dcf9c1a4ab804c1477cf89256799a3767..70ff07499c94f99b98761f615758f1ec5ec8920a 100644 Binary files a/vipra-ui/app/img/ui-network.png and b/vipra-ui/app/img/ui-network.png differ diff --git a/vipra-ui/app/img/ui-start-hover.png b/vipra-ui/app/img/ui-start-hover.png deleted file mode 100644 index 3c302933eda5ca90b9e3ef8d1e207c81acd7baec..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-start-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-start.png b/vipra-ui/app/img/ui-start.png index 5b243bb69e8aba448f8220c9e699a81e3d80216e..9c99fed60a5df258e9fd3815fb2f1afc1c13d77a 100644 Binary files a/vipra-ui/app/img/ui-start.png and b/vipra-ui/app/img/ui-start.png differ diff --git a/vipra-ui/app/img/ui-topic-articles.png b/vipra-ui/app/img/ui-topic-articles.png new file mode 100644 index 0000000000000000000000000000000000000000..ea110d913c597a4a5775d8ac2df786480a299a7c Binary files /dev/null and b/vipra-ui/app/img/ui-topic-articles.png differ diff --git a/vipra-ui/app/img/ui-topic-hover.png b/vipra-ui/app/img/ui-topic-hover.png deleted file mode 100644 index 4e58ab38fd2c78f5e1a7fe1d684f83583ede755a..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-topic-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-topic-sequences.png b/vipra-ui/app/img/ui-topic-sequences.png new file mode 100644 index 0000000000000000000000000000000000000000..328f2b33802b1fcf63ff4660656b8b201edcd47d Binary files /dev/null and b/vipra-ui/app/img/ui-topic-sequences.png differ diff --git a/vipra-ui/app/img/ui-topic.png b/vipra-ui/app/img/ui-topic.png index c64f637ff388de08bd5ef618327dc2de8b76a63d..1fdd2296ea27d6e986003d3ca46e4a229c3c6338 100644 Binary files a/vipra-ui/app/img/ui-topic.png and b/vipra-ui/app/img/ui-topic.png differ diff --git a/vipra-ui/app/img/ui-topicmodels.png b/vipra-ui/app/img/ui-topicmodels.png new file mode 100644 index 0000000000000000000000000000000000000000..8ce48621f9f1c0530974bba6bc6aac1c2da0471c Binary files /dev/null and b/vipra-ui/app/img/ui-topicmodels.png differ diff --git a/vipra-ui/app/img/ui-topics-hover.png b/vipra-ui/app/img/ui-topics-hover.png deleted file mode 100644 index 4018877801bd8b2b4d7f7f7bae32feaa0f240027..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-topics-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-topics.png b/vipra-ui/app/img/ui-topics.png index 1bb2ac12dbefbba872a04e65100d3b15280ddbbf..f7333816ea33b525b9cf9710c6cead195e648e48 100644 Binary files a/vipra-ui/app/img/ui-topics.png and b/vipra-ui/app/img/ui-topics.png differ diff --git a/vipra-ui/app/img/ui-word-articles.png b/vipra-ui/app/img/ui-word-articles.png new file mode 100644 index 0000000000000000000000000000000000000000..316fb74b4bc8e6d68ed2d9041007baa83deb9591 Binary files /dev/null and b/vipra-ui/app/img/ui-word-articles.png differ diff --git a/vipra-ui/app/img/ui-word-hover.png b/vipra-ui/app/img/ui-word-hover.png deleted file mode 100644 index 840cd7ee65ca68a9c960dfa23abb19d1c5e61ce1..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-word-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-word-topics.png b/vipra-ui/app/img/ui-word-topics.png new file mode 100644 index 0000000000000000000000000000000000000000..25400bcb855162dca25b0f7236e46227b8098b44 Binary files /dev/null and b/vipra-ui/app/img/ui-word-topics.png differ diff --git a/vipra-ui/app/img/ui-word.png b/vipra-ui/app/img/ui-word.png index 286aff0490cb9eac6e7890a9e7cfc82db61b230f..12dd883c79093e6338963e553bb90e71720b3b2b 100644 Binary files a/vipra-ui/app/img/ui-word.png and b/vipra-ui/app/img/ui-word.png differ diff --git a/vipra-ui/app/img/ui-words-hover.png b/vipra-ui/app/img/ui-words-hover.png deleted file mode 100644 index 1696d928d3e988a711a19381fe7764bee92b0ac5..0000000000000000000000000000000000000000 Binary files a/vipra-ui/app/img/ui-words-hover.png and /dev/null differ diff --git a/vipra-ui/app/img/ui-words.png b/vipra-ui/app/img/ui-words.png index 76e3ecd5b974de8488ba23ada3134379bc9d22a3..1c5ac444fc13187bee874a3e541aeb076b93ab23 100644 Binary files a/vipra-ui/app/img/ui-words.png and b/vipra-ui/app/img/ui-words.png differ diff --git a/vipra-ui/app/js/app.js b/vipra-ui/app/js/app.js index b94779fb0cd3a3e4fce246f7d920378d09c2294a..1720e2110d5321a1ec4cf067d57530cf11a01e85 100644 --- a/vipra-ui/app/js/app.js +++ b/vipra-ui/app/js/app.js @@ -70,6 +70,18 @@ controller: 'ArticlesShowController' }); + $stateProvider.state('articles.show.entities', { + url: '/entities', + templateUrl: 'html/articles/entities.html', + controller: 'ArticlesEntitiesController' + }); + + $stateProvider.state('articles.show.words', { + url: '/words', + templateUrl: 'html/articles/words.html', + controller: 'ArticlesWordsController' + }); + // states: topics $stateProvider.state('topics', { @@ -277,14 +289,22 @@ $('body').append(dropdownMenu.detach()); // grab the new offset position - var eOffset = $(e.target).offset(); + var eTarget = $(e.target); + var eOffset = eTarget.offset(); + var css = { + 'display': 'block', + 'top': eOffset.top + eTarget.outerHeight(), + 'left': eOffset.left, + 'right': 'auto' + }; + + if(dropdownMenu.hasClass('pull-left')) { + css.left = 'auto'; + css.right = $(window).width() - (eOffset.left + eTarget.outerWidth()); + } // make sure to place it where it would normally go (this could be improved) - dropdownMenu.css({ - 'display': 'block', - 'top': eOffset.top + $(e.target).outerHeight(), - 'left': eOffset.left - }); + dropdownMenu.css(css); }); // and when you hide it, reattach the drop down, and hide it normally diff --git a/vipra-ui/app/js/controllers.js b/vipra-ui/app/js/controllers.js index c0f72f89e1817b3b13e070d4934b9c89a1a78320..1a62abdb7a5e881500039743ce703b41d9d7d3b1 100644 --- a/vipra-ui/app/js/controllers.js +++ b/vipra-ui/app/js/controllers.js @@ -204,7 +204,7 @@ ]); /** - * Index controller + * Index route */ app.controller('IndexController', ['$scope', '$stateParams', '$location', '$timeout', 'ArticleFactory', 'TopicFactory', 'SearchFactory', function($scope, $stateParams, $location, $timeout, ArticleFactory, TopicFactory, SearchFactory) { @@ -276,7 +276,7 @@ ]); /** - * About controller + * About route */ app.controller('AboutController', ['$scope', 'InfoFactory', function($scope, InfoFactory) { @@ -292,7 +292,7 @@ ]); /** - * Network controller + * Network route */ app.controller('NetworkController', ['$scope', '$state', '$stateParams', '$q', '$window', '$timeout', 'ArticleFactory', 'TopicFactory', 'WordFactory', function($scope, $state, $stateParams, $q, $window, $timeout, ArticleFactory, TopicFactory, WordFactory) { @@ -849,6 +849,9 @@ } ]); + /** + * Explorer route + */ app.controller('ExplorerController', ['$scope', '$state', '$templateCache', '$timeout', 'TopicFactory', function($scope, $state, $templateCache, $timeout, TopicFactory) { $scope.rootModels.title = 'Explorer'; @@ -1135,7 +1138,7 @@ ****************************************************************************/ /** - * Article Index route + * Articles Index route */ app.controller('ArticlesIndexController', ['$scope', '$state', 'ArticleFactory', function($scope, $state, ArticleFactory) { @@ -1186,7 +1189,7 @@ ]); /** - * Article Show route + * Articles Show route */ app.controller('ArticlesShowController', ['$scope', '$state', '$stateParams', '$timeout', 'ArticleFactory', function($scope, $state, $stateParams, $timeout, ArticleFactory) { @@ -1250,42 +1253,43 @@ $scope.error(e.status); }); - $scope.openTabWords = function() { - if ($scope.words) return; - - ArticleFactory.get({ - id: $stateParams.id, - fields: 'words' - }, function(data) { - $scope.allWords = data.words; - $scope.showMoreWords(); - }); + $scope.prepareText = function(text) { + var entityBase = $state.href('entities', {}, {absolute: true}), + wordBase = $state.href('words', {}, {absolute: true}); + return text.replace(/data-entity="([^"]*)"/g, 'href="' + entityBase + '/$1"').replace(/data-word="([^"]*)"/g, 'href="' + wordBase + '/$1"'); }; - var wordsCount = 0; - $scope.showMoreWords = function() { - wordsCount += 20; - $scope.words = $scope.allWords.slice(0, wordsCount); - $scope.refreshWordDistribution(); - }; + var topicShareChartElement = $('#topic-share'); + $scope.highlightSlice = function(id, toggle) { + var highcharts = topicShareChartElement.highcharts(); + if (!highcharts) return; + var point = highcharts.get(id); + if (!point) return; - $scope.showAllWords = function() { - wordsCount = $scope.allWords.length; - $scope.words = $scope.allWords; - $scope.refreshWordDistribution(); + if (toggle) { + point.onMouseOver(); + } else { + point.onMouseOut(); + highcharts.tooltip.hide(); + } }; + } + ]); - $scope.openTabEntities = function() { - if ($scope.entities) return; + /** + * Articles Entities route + */ + app.controller('ArticlesEntitiesController', ['$scope', '$state', '$stateParams', 'ArticleFactory', + function($scope, $state, $stateParams, ArticleFactory) { + $scope.id = $stateParams.id; - ArticleFactory.get({ - id: $stateParams.id, - fields: 'entities' - }, function(data) { - $scope.allEntities = data.entities; - $scope.showMoreEntities(); - }); - }; + ArticleFactory.get({ + id: $stateParams.id, + fields: 'entities' + }, function(data) { + $scope.allEntities = data.entities; + $scope.showMoreEntities(); + }); var entitiesCount = 0; $scope.showMoreEntities = function() { @@ -1300,26 +1304,6 @@ $scope.refreshEntityDistribution(); }; - $scope.prepareText = function(text) { - var base = $state.href('entities', {}, {absolute: true}); - return text.replace(/href="[^"]*" data-entity="([^"]*)"/g, 'href="' + base + '/$1"'); - }; - - var topicShareChartElement = $('#topic-share'); - $scope.highlightSlice = function(id, toggle) { - var highcharts = topicShareChartElement.highcharts(); - if (!highcharts) return; - var point = highcharts.get(id); - if (!point) return; - - if (toggle) { - point.onMouseOver(); - } else { - point.onMouseOut(); - highcharts.tooltip.hide(); - } - }; - $scope.refreshEntityDistribution = function() { var series = []; if($scope.entities && $scope.entities.length) { @@ -1332,6 +1316,36 @@ } $scope.entityDistribution = itemCountChart(series, ['Entities']); }; + } + ]); + + /** + * Articles Words route + */ + app.controller('ArticlesWordsController', ['$scope', '$state', '$stateParams', 'ArticleFactory', + function($scope, $state, $stateParams, ArticleFactory) { + $scope.id = $stateParams.id; + + ArticleFactory.get({ + id: $stateParams.id, + fields: 'words' + }, function(data) { + $scope.allWords = data.words; + $scope.showMoreWords(); + }); + + var wordsCount = 0; + $scope.showMoreWords = function() { + wordsCount += 20; + $scope.words = $scope.allWords.slice(0, wordsCount); + $scope.refreshWordDistribution(); + }; + + $scope.showAllWords = function() { + wordsCount = $scope.allWords.length; + $scope.words = $scope.allWords; + $scope.refreshWordDistribution(); + }; $scope.refreshWordDistribution = function() { var series = []; @@ -1353,7 +1367,7 @@ ****************************************************************************/ /** - * Topic Index route + * Topics Index route */ app.controller('TopicsIndexController', ['$scope', '$state', 'TopicFactory', function($scope, $state, TopicFactory) { @@ -1401,10 +1415,10 @@ ]); /** - * Topic Show route + * Topics Show route */ - app.controller('TopicsShowController', ['$scope', '$state', '$stateParams', '$timeout', 'TopicFactory', 'SequenceFactory', - function($scope, $state, $stateParams, $timeout, TopicFactory, SequenceFactory) { + app.controller('TopicsShowController', ['$scope', '$state', '$stateParams', '$timeout', 'TopicFactory', + function($scope, $state, $stateParams, $timeout, TopicFactory) { $scope.rootModels.title = 'Topic'; $scope.topicsShowModels = { @@ -1517,10 +1531,11 @@ ]); /** - * Topic Show Sequences route + * Topics Show Sequences route */ app.controller('TopicsSequencesController', ['$scope', '$state', '$stateParams', 'TopicFactory', 'SequenceFactory', function($scope, $state, $stateParams, TopicFactory, SequenceFactory) { + $scope.id = $stateParams.id; $scope.recalcSeqChange = function() { if (!$scope.sequence || !$scope.sequenceCompare) return; @@ -1579,10 +1594,11 @@ ]); /** - * Topic Show Articles route + * Topics Show Articles route */ app.controller('TopicsArticlesController', ['$scope', '$stateParams', 'TopicFactory', function($scope, $stateParams, TopicFactory) { + $scope.id = $stateParams.id; $scope.topicsArticlesModels = { sortkey: 'title', @@ -1613,6 +1629,9 @@ * Entity Controllers ****************************************************************************/ + /** + * Entities Index route + */ app.controller('EntitiesIndexController', ['$scope', '$state', 'EntityFactory', function($scope, $state, EntityFactory) { $scope.rootModels.title = 'Entities'; @@ -1658,6 +1677,9 @@ } ]); + /** + * Entities Show route + */ app.controller('EntitiesShowController', ['$scope', '$state', '$stateParams', 'EntityFactory', function($scope, $state, $stateParams, EntityFactory) { $scope.rootModels.title = 'Entity'; @@ -1671,6 +1693,7 @@ $scope.entityCreated = Vipra.formatDateTime($scope.entity.created); $scope.entityModified = Vipra.formatDateTime($scope.entity.modified); $scope.rootModels.title = $scope.entity.entity; + $scope.types = Vipra.joinResources(data.types); }, function(e) { $scope.error(e.status); }); @@ -1678,6 +1701,9 @@ } ]); + /** + * Entities Articles route + */ app.controller('EntitiesArticlesController', ['$scope', '$state', '$stateParams', 'ArticleFactory', function($scope, $state, $stateParams, ArticleFactory) { @@ -1717,6 +1743,9 @@ * Word Controllers ****************************************************************************/ + /** + * Words Index route + */ app.controller('WordsIndexController', ['$scope', '$state', 'WordFactory', function($scope, $state, WordFactory) { $scope.rootModels.title = 'Words'; @@ -1760,6 +1789,9 @@ } ]); + /** + * Words Show route + */ app.controller('WordsShowController', ['$scope', '$state', '$stateParams', 'WordFactory', function($scope, $state, $stateParams, WordFactory) { $scope.rootModels.title = 'Word'; @@ -1778,6 +1810,9 @@ } ]); + /** + * Words Topics route + */ app.controller('WordsTopicsController', ['$scope', '$state', '$stateParams', 'TopicFactory', function($scope, $state, $stateParams, TopicFactory) { @@ -1809,6 +1844,9 @@ } ]); + /** + * Words Articles route + */ app.controller('WordsArticlesController', ['$scope', '$state', '$stateParams', 'ArticleFactory', function($scope, $state, $stateParams, ArticleFactory) { @@ -1844,7 +1882,11 @@ * Slides Controllers ****************************************************************************/ - app.controller('SlidesController', ['$scope', function($scope) { + /** + * Slides route + */ + app.controller('SlidesController', ['$scope', + function($scope) { $scope.rootModels.title = 'Slides'; $scope.current = 1; @@ -1871,6 +1913,9 @@ * Error Controllers ****************************************************************************/ + /** + * Error route + */ app.controller('ErrorsController', ['$scope', '$state', '$stateParams', function($scope, $state, $stateParams) { $scope.code = $stateParams.code; @@ -1912,6 +1957,7 @@ $scope.changePage = function(page) { $scope.page = parseInt(page, 10); + window.scrollTo(0,0); }; $scope.toPage = function() { @@ -1922,6 +1968,9 @@ } ]); + /** + * Word Evolution + */ app.controller('WordEvolutionController', ['$scope', function($scope) { @@ -1951,8 +2000,8 @@ probs.push([new Date($scope.topic.sequences[j].window.startDate).getTime(), prob]); } evolutions.push({ - id: word.id, - name: word.id, + id: word.word, + name: word.word, color: word.color, data: probs }); diff --git a/vipra-ui/app/js/directives.js b/vipra-ui/app/js/directives.js index 4747f26859f408de855936665cc9b0d2049b4834..2c59a6159418c53bf263934cd98bf88ae50dd850 100644 --- a/vipra-ui/app/js/directives.js +++ b/vipra-ui/app/js/directives.js @@ -42,6 +42,7 @@ link: function($scope) { $scope.showBadge = $scope.badge !== 'false'; $scope.showMenu = $scope.menu !== 'false'; + $scope.id = typeof $scope.word === 'string' ? $scope.word.toLowerCase() : $scope.word.word; } }; }]); @@ -374,6 +375,7 @@ templateUrl: 'html/directives/word-menu.html', link: function($scope) { $scope.dropdownRight = $scope.right === 'true'; + $scope.id = typeof $scope.word === 'string' ? $scope.word.toLowerCase() : $scope.word.word; } }; }]); diff --git a/vipra-ui/app/js/helpers.js b/vipra-ui/app/js/helpers.js index a79de3785b613bb52e461ab97390e69e609b0fa3..6e86b302cfe27ef48e0162230a6845cfc815db97 100644 --- a/vipra-ui/app/js/helpers.js +++ b/vipra-ui/app/js/helpers.js @@ -186,18 +186,15 @@ } }; - Vipra.getWSURL = function() { - if(Vipra.config.websocketUrl) return Vipra.config.websocketUrl; - var protocol = location.protocol === 'https:' ? 'wss:' : 'ws:'; - var path = Vipra.config.restUrl.replace(/rest$/, 'ws'); - if(Vipra.config.restUrl.indexOf('//') !== -1) { - // contains protocol - return path.replace(/^[^/]+\/\//, protocol + '//'); - } else { - // contains no protocol - return protocol + location.hostname + path; + Vipra.joinResources = function(resources) { + if(resources && resources.length) { + var res = []; + for(var i = 0; i < resources.length; i++) + res.push('<a href="http://dbpedia.org/resource/' + resources[i] + '">' + resources[i] + '</a>'); + return res.join(', '); } - }; + return 'No types'; + } /** * Polyfills diff --git a/vipra-util/src/main/java/de/vipra/util/Config.java b/vipra-util/src/main/java/de/vipra/util/Config.java index ee37dab8697d3a6a7e31c56c1a9cc4fd488b6d30..a5aec3b052e7b4dafb5c89d53ad5288927c2f3d6 100644 --- a/vipra-util/src/main/java/de/vipra/util/Config.java +++ b/vipra-util/src/main/java/de/vipra/util/Config.java @@ -132,6 +132,10 @@ public class Config { return modelConfig; } + public void setTopicModelConfig(final TopicModelConfig config) { + topicModelConfigs.put(config.getName(), config); + } + public void deleteTopicModelConfig(final String name) { topicModelConfigs.remove(name); } diff --git a/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java b/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java index 34ce26db7ad32b8a06f4441b56b32d9ca922a5f9..94c8931b67dcc6a4e8e8d8f3cb3c3dce03a4f8aa 100644 --- a/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/ConsoleUtils.java @@ -18,6 +18,9 @@ public class ConsoleUtils { ERROR }; + public static final String PATH_T = "├"; + public static final String PATH_L = "└"; + private static boolean silent = false; private static int pad = 5; private static int lastLineLength = 0; @@ -266,11 +269,11 @@ public class ConsoleUtils { String positionStr = "(" + StringUtils.pad(Integer.toString(current), Integer.toString(max).length(), " ", true) + "/" + max + ")"; if (max > 1) { if (current < max) - positionStr = " ├ " + positionStr; + positionStr = " " + PATH_T + " " + positionStr; else - positionStr = " └ " + positionStr; + positionStr = " " + PATH_L + " " + positionStr; } else - positionStr = " └ " + positionStr; + positionStr = " " + PATH_L + " " + positionStr; return positionStr; } diff --git a/vipra-util/src/main/java/de/vipra/util/Constants.java b/vipra-util/src/main/java/de/vipra/util/Constants.java index 484fdb4b7d52bbc734ec224ec5fd357fe3d4ef49..e9e4e7b8f542e7c8e19cca413089c965076af256 100644 --- a/vipra-util/src/main/java/de/vipra/util/Constants.java +++ b/vipra-util/src/main/java/de/vipra/util/Constants.java @@ -151,7 +151,7 @@ public class Constants { * Minimum number of dbpedia inlinks for an entity annotation to be * accepted. Default 20. */ - public static final int SPOTLIGHT_SUPPORT = 20; + public static final int SPOTLIGHT_SUPPORT = 100; /** * Disambiguation confidence. Eliminates top n percent of inconfident @@ -176,6 +176,11 @@ public class Constants { public static final boolean PROCESSOR_USE_ENTITY_TYPES = true; public static final boolean PROCESSOR_USE_HYPERNYMS = true; + /** + * True to query entity descriptions from dbpedia upon entity detection. + */ + public static final boolean QUERY_ENTITY_DESCRIPTIONS = true; + /** * Stopwords list. Extensive list of stopwords used to clean imported * articles of the most common words before topic modeling is applied. diff --git a/vipra-util/src/main/java/de/vipra/util/StringUtils.java b/vipra-util/src/main/java/de/vipra/util/StringUtils.java index c903def65e18de28017f25574a023bad83c18745..f215a12a290c73d7ad80ce7bfd9cf46c5fed281c 100644 --- a/vipra-util/src/main/java/de/vipra/util/StringUtils.java +++ b/vipra-util/src/main/java/de/vipra/util/StringUtils.java @@ -244,4 +244,10 @@ public class StringUtils { return pad(str, length, " ", left); } + public static final Pattern WORD_PATTERN = Pattern.compile("[a-zA-Z]+"); + + public static boolean isWord(final String str) { + return WORD_PATTERN.matcher(str).matches(); + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/model/ArticleWord.java b/vipra-util/src/main/java/de/vipra/util/model/ArticleWord.java index 5020ef236b1d63c04567518d30d8101585def97d..e3ee8969a5cf990f3fd2c9a7642c0c0b6e87321d 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/ArticleWord.java +++ b/vipra-util/src/main/java/de/vipra/util/model/ArticleWord.java @@ -39,6 +39,10 @@ public class ArticleWord implements Comparable<ArticleWord>, Serializable { this.count = count; } + public String aTag(final String word) { + return "<a data-word=\"" + this.word + "\">" + word + "</a>"; + } + @Override public int compareTo(final ArticleWord o) { return count.compareTo(o.getCount()); @@ -49,4 +53,29 @@ public class ArticleWord implements Comparable<ArticleWord>, Serializable { return "ArticleWord [word=" + word + ", count=" + count + "]"; } + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((word == null) ? 0 : word.hashCode()); + return result; + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final ArticleWord other = (ArticleWord) obj; + if (word == null) { + if (other.word != null) + return false; + } else if (!word.equals(other.word)) + return false; + return true; + } + } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java b/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java index 3a0be98c7dc6eadfe467182205d512927b43e731..80f1cdb2ba54ace0a044d3d0cfa4bcb4821d98a7 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TextEntity.java @@ -8,6 +8,8 @@ import org.mongodb.morphia.annotations.Embedded; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import de.vipra.util.StringUtils; + @JsonIgnoreProperties(ignoreUnknown = true) @Embedded public class TextEntity implements Serializable { @@ -18,6 +20,8 @@ public class TextEntity implements Serializable { private Boolean isHypernym; + private Boolean isWord; + private String url; private List<String> types; @@ -35,6 +39,7 @@ public class TextEntity implements Serializable { public void setEntity(final String entity) { this.entity = entity; + isWord = StringUtils.isWord(entity); } public Boolean getIsHypernym() { @@ -45,6 +50,14 @@ public class TextEntity implements Serializable { this.isHypernym = isHypernym; } + public Boolean getIsWord() { + return isWord; + } + + public void setIsWord(final Boolean isWord) { + this.isWord = isWord; + } + public String getUrl() { return url; } diff --git a/vipra-util/src/main/java/de/vipra/util/model/TextEntityFull.java b/vipra-util/src/main/java/de/vipra/util/model/TextEntityFull.java index 2b5c7d729027b4de8046e20fb1e1a8b97ee70a88..776829e3e012ac6d5df477c6613953bfbc9a92f7 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TextEntityFull.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TextEntityFull.java @@ -16,6 +16,7 @@ import org.mongodb.morphia.annotations.Reference; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import de.vipra.util.Constants; +import de.vipra.util.StringUtils; import de.vipra.util.an.QueryIgnore; @JsonIgnoreProperties(ignoreUnknown = true) @@ -37,6 +38,9 @@ public class TextEntityFull implements Model<String>, Serializable, Comparable<T @QueryIgnore(multi = true) private Boolean isHypernym; + @QueryIgnore(multi = true) + private Boolean isWord; + @Reference @QueryIgnore(multi = true) private TopicModel topicModel; @@ -59,6 +63,7 @@ public class TextEntityFull implements Model<String>, Serializable, Comparable<T url = textEntity.getUrl(); types = textEntity.getTypes(); isHypernym = textEntity.getIsHypernym(); + isWord = StringUtils.isWord(entity); this.topicModel = topicModel; id = entity.toLowerCase() + "-" + topicModel.getId(); } @@ -79,6 +84,7 @@ public class TextEntityFull implements Model<String>, Serializable, Comparable<T public void setEntity(final String entity) { this.entity = entity; + isWord = StringUtils.isWord(entity); } public String getDescription() { @@ -97,6 +103,14 @@ public class TextEntityFull implements Model<String>, Serializable, Comparable<T this.isHypernym = isHypernym; } + public Boolean getIsWord() { + return isWord; + } + + public void setIsWord(final Boolean isWord) { + this.isWord = isWord; + } + public TopicModel getTopicModel() { return topicModel; } @@ -142,7 +156,7 @@ public class TextEntityFull implements Model<String>, Serializable, Comparable<T } public String aTag(final String entity) { - return "<a href=\"" + url + "\" data-entity=\"" + this.entity + "\">" + entity + "</a>"; + return "<a data-entity=\"" + this.entity + "\">" + entity + "</a>"; } public String realEntity() { @@ -191,6 +205,8 @@ public class TextEntityFull implements Model<String>, Serializable, Comparable<T public void prePersist() { if (isHypernym == null) isHypernym = false; + if (isWord == null) + isWord = false; modified = new Date(); if (created == null) created = modified; diff --git a/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java b/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java index 4354c0243fadecc6fb16a25e72e70d15e159702e..a6c8ba788e475806b387f8b8a82ed3a986d83cb8 100644 --- a/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java +++ b/vipra-util/src/main/java/de/vipra/util/model/TopicModelConfig.java @@ -47,6 +47,7 @@ public class TopicModelConfig implements Serializable { private boolean processorUseEntities = Constants.PROCESSOR_USE_ENTITIES; private boolean processorUseEntityTypes = Constants.PROCESSOR_USE_ENTITY_TYPES; private boolean processorUseHypernyms = Constants.PROCESSOR_USE_HYPERNYMS; + private boolean queryEntityDescriptions = Constants.QUERY_ENTITY_DESCRIPTIONS; private WindowResolution windowResolution = Constants.WINDOW_RESOLUTION; public TopicModelConfig() {} @@ -73,6 +74,7 @@ public class TopicModelConfig implements Serializable { processorUseEntities = topicModelConfig.isProcessorUseEntities(); processorUseEntityTypes = topicModelConfig.isProcessorUseEntityTypes(); processorUseHypernyms = topicModelConfig.isProcessorUseHypernyms(); + queryEntityDescriptions = topicModelConfig.isQueryEntityDescriptions(); windowResolution = topicModelConfig.getWindowResolution(); } @@ -260,6 +262,14 @@ public class TopicModelConfig implements Serializable { this.processorUseHypernyms = processorUseHypernyms; } + public boolean isQueryEntityDescriptions() { + return queryEntityDescriptions; + } + + public void setQueryEntityDescriptions(final boolean queryEntityDescriptions) { + this.queryEntityDescriptions = queryEntityDescriptions; + } + public WindowResolution getWindowResolution() { return windowResolution; } @@ -298,8 +308,10 @@ public class TopicModelConfig implements Serializable { + "\n documentMinimumWordFrequency: " + documentMinimumWordFrequency + "\n spotlightSupport: " + spotlightSupport + "\n spotlightConfidence: " + spotlightConfidence + "\n minTopicShare: " + minTopicShare + "\n minRelativeProbability: " + minRelativeProbability + "\n risingDecayLambda: " + risingDecayLambda + "\n maxSimilarDocumentsDivergence: " - + maxSimilarDocumentsDivergence + "\n maxSimilarTopicsDivergence: " + maxSimilarTopicsDivergence + "\n windowResolution: " - + windowResolution; + + maxSimilarDocumentsDivergence + "\n maxSimilarTopicsDivergence: " + maxSimilarTopicsDivergence + "\n processor use text: " + + processorUseText + "\n processor use entities: " + processorUseEntities + "\n processor use entity types: " + + processorUseEntityTypes + "\n processor use hypernyms: " + processorUseHypernyms + "\n query entity descriptions: " + + queryEntityDescriptions + "\n windowResolution: " + windowResolution; } }