diff --git a/ReutersReader/.classpath b/ReutersReader/.classpath new file mode 100644 index 0000000000000000000000000000000000000000..149cb3c90688952162d8c347ea8ee4ac2a23e17b --- /dev/null +++ b/ReutersReader/.classpath @@ -0,0 +1,20 @@ +<?xml version="1.0" encoding="UTF-8"?> +<classpath> + <classpathentry kind="src" output="target/classes" path="src"> + <attributes> + <attribute name="optional" value="true"/> + <attribute name="maven.pomderived" value="true"/> + </attributes> + </classpathentry> + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"> + <attributes> + <attribute name="maven.pomderived" value="true"/> + </attributes> + </classpathentry> + <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> + <attributes> + <attribute name="maven.pomderived" value="true"/> + </attributes> + </classpathentry> + <classpathentry kind="output" path="target/classes"/> +</classpath> diff --git a/ReutersReader/.project b/ReutersReader/.project new file mode 100644 index 0000000000000000000000000000000000000000..6ec2a3f57c0d22bda67a4bd28b308190e0cd6d98 --- /dev/null +++ b/ReutersReader/.project @@ -0,0 +1,23 @@ +<?xml version="1.0" encoding="UTF-8"?> +<projectDescription> + <name>ReutersReader</name> + <comment></comment> + <projects> + </projects> + <buildSpec> + <buildCommand> + <name>org.eclipse.jdt.core.javabuilder</name> + <arguments> + </arguments> + </buildCommand> + <buildCommand> + <name>org.eclipse.m2e.core.maven2Builder</name> + <arguments> + </arguments> + </buildCommand> + </buildSpec> + <natures> + <nature>org.eclipse.m2e.core.maven2Nature</nature> + <nature>org.eclipse.jdt.core.javanature</nature> + </natures> +</projectDescription> diff --git a/ReutersReader/.settings/org.eclipse.jdt.core.prefs b/ReutersReader/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000000000000000000000000000000000000..672496e107ed4e070613aad776b4df5146306bd4 --- /dev/null +++ b/ReutersReader/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,12 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/ReutersReader/.settings/org.eclipse.m2e.core.prefs b/ReutersReader/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 0000000000000000000000000000000000000000..f897a7f1cb2389f85fe6381425d29f0a9866fb65 --- /dev/null +++ b/ReutersReader/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,4 @@ +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/ReutersReader/pom.xml b/ReutersReader/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..0275591a022bb74d6513a36de6297da9739836c4 --- /dev/null +++ b/ReutersReader/pom.xml @@ -0,0 +1,34 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <groupId>asd</groupId> + <artifactId>asd</artifactId> + <version>0.0.1-SNAPSHOT</version> + <build> + <sourceDirectory>src</sourceDirectory> + <plugins> + <plugin> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.3</version> + <configuration> + <source>1.8</source> + <target>1.8</target> + </configuration> + </plugin> + </plugins> + </build> + <dependencies> + + <!-- Jackson --> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + <version>2.7.0</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + <version>2.7.0</version> + </dependency> + </dependencies> +</project> \ No newline at end of file diff --git a/ReutersReader/src/asd/Main.java b/ReutersReader/src/asd/Main.java new file mode 100644 index 0000000000000000000000000000000000000000..9f72e50002bc9e388ba66b302af08afa4f94688d --- /dev/null +++ b/ReutersReader/src/asd/Main.java @@ -0,0 +1,43 @@ +package asd; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.util.ArrayList; +import java.util.List; + +import com.fasterxml.jackson.core.JsonParser.Feature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; + +import de.fuberlin.inf.agcsw.cer.models.Article; +import de.fuberlin.inf.agcsw.cer.models.ReutersArticleWithFrames; + +public class Main { + + public static final ObjectMapper mapper = new ObjectMapper(); + + static { + mapper.enable(SerializationFeature.INDENT_OUTPUT); + mapper.enable(Feature.ALLOW_COMMENTS); + } + + @SuppressWarnings("unused") + public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException { + List<ReutersArticleWithFrames> articles = new ArrayList<>(); + + try (ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream( + new FileInputStream("/home/eike/Downloads/articles-annotated-0-0.5-withframes.ser")))) { + articles = (List<ReutersArticleWithFrames>) ois.readObject(); + System.out.println(articles.size()); + List<Article> articles2 = new ArrayList<>(articles.size()); + for (ReutersArticleWithFrames a : articles) + articles2.add(new Article(a)); + mapper.writeValue(new File("/home/eike/Downloads/articles.json"), articles2); + } + } + +} diff --git a/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/Article.java b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/Article.java new file mode 100644 index 0000000000000000000000000000000000000000..6174326c746a8d829bc62480e46e3bc7998a6448 --- /dev/null +++ b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/Article.java @@ -0,0 +1,32 @@ +package de.fuberlin.inf.agcsw.cer.models; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; + +import com.fasterxml.jackson.annotation.JsonIgnore; + +public class Article { + + @JsonIgnore + private static DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); + + @JsonIgnore + private ReutersArticleWithFrames article; + + public Article(ReutersArticleWithFrames a) { + article = a; + } + + public String getTitle() { + return article.getTitle(); + } + + public String getText() { + return article.getText(); + } + + public String getDate() { + return DATE_FORMAT.format(article.getDate()); + } + +} diff --git a/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/ReutersArticle.java b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/ReutersArticle.java new file mode 100644 index 0000000000000000000000000000000000000000..fa2dd2f6b1e504b1f7430a2f8787d822fb73e354 --- /dev/null +++ b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/ReutersArticle.java @@ -0,0 +1,82 @@ +package de.fuberlin.inf.agcsw.cer.models; + +import java.io.Serializable; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; + +import de.fuberlin.inf.agcsw.dbpedia.annotation.models.SpotlightAnnotation; + +/** + * Created by wojlukas on 2/2/16. + */ +public class ReutersArticle implements Serializable { + private String dateString; + private Date date; + private String title; + private String text; + private String uuid; + private SpotlightAnnotation annotation; + + private static DateFormat DATE_FORMAT = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss.SS"); + + public ReutersArticle() { + } + + public String getDateString() { + return dateString; + } + + public void setDateString(String dateString) { + this.dateString = dateString; + } + + public Date getDate() { + return date; + } + + public void setDate(Date date) { + this.date = date; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getText() { + return text; + } + + public void setText(String text) { + this.text = text; + } + + public void parseDate() { + try { + date = DATE_FORMAT.parse(dateString); + } catch (ParseException e) { + e.printStackTrace(); + } + } + + public String getUuid() { + return uuid; + } + + public void setUuid(String uuid) { + this.uuid = uuid; + } + + public SpotlightAnnotation getAnnotation() { + return annotation; + } + + public void setAnnotation(SpotlightAnnotation annotation) { + this.annotation = annotation; + } +} \ No newline at end of file diff --git a/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/ReutersArticleWithFrames.java b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/ReutersArticleWithFrames.java new file mode 100644 index 0000000000000000000000000000000000000000..ab117bb56620dbd3c5199163bcbf3b545c533d6b --- /dev/null +++ b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/ReutersArticleWithFrames.java @@ -0,0 +1,107 @@ +package de.fuberlin.inf.agcsw.cer.models; + +import java.io.Serializable; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +import de.fuberlin.inf.agcsw.cer.models.framenet.Frame; +import de.fuberlin.inf.agcsw.dbpedia.annotation.models.SpotlightAnnotation; + +public class ReutersArticleWithFrames implements Serializable { + private String dateString; + private Date date; + private String title; + private String text; + private String uuid; + private SpotlightAnnotation annotation; + private List<Frame> frames = new ArrayList<>(); + + private static DateFormat DATE_FORMAT = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss.SS"); + + public ReutersArticleWithFrames() { + } + + public String getDateString() { + return dateString; + } + + public void setDateString(String dateString) { + this.dateString = dateString; + } + + public Date getDate() { + return date; + } + + public void setDate(Date date) { + this.date = date; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getText() { + return text; + } + + public void setText(String text) { + this.text = text; + } + + public void parseDate() { + try { + date = DATE_FORMAT.parse(dateString); + } catch (ParseException e) { + e.printStackTrace(); + } + } + + public String getUuid() { + return uuid; + } + + public void setUuid(String uuid) { + this.uuid = uuid; + } + + public SpotlightAnnotation getSpotlightAnnotation() { + return annotation; + } + + public void setSpotlightAnnotation(SpotlightAnnotation annotation) { + this.annotation = annotation; + } + + public List<Frame> getFramenetFrames() { + return frames; + } + + public void setFramenetFrames(List<Frame> frames) { + this.frames = frames; + } + + public void addFrames(List<Frame> frames) { + this.frames.addAll(frames); + } + + public static ReutersArticleWithFrames cloneFromOldArticle(ReutersArticle reutersArticle) { + ReutersArticleWithFrames res = new ReutersArticleWithFrames(); + res.setSpotlightAnnotation(reutersArticle.getAnnotation()); + res.setDate(reutersArticle.getDate()); + res.setDateString(reutersArticle.getDateString()); + res.setText(reutersArticle.getText()); + res.setTitle(reutersArticle.getTitle()); + res.setUuid(reutersArticle.getUuid()); + + return res; + } +} diff --git a/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/AnnotationSet.java b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/AnnotationSet.java new file mode 100644 index 0000000000000000000000000000000000000000..e7170104754cfc251b5ce0806de1200b1faaaec8 --- /dev/null +++ b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/AnnotationSet.java @@ -0,0 +1,37 @@ +package de.fuberlin.inf.agcsw.cer.models.framenet; + +import java.io.Serializable; +import java.util.List; + +/** + * Created by wojlukas on 3/7/16. + */ +public class AnnotationSet implements Serializable { + private List<FrameElement> frameElements; + private int rank; + private double score; + + public List<FrameElement> getFrameElements() { + return frameElements; + } + + public void setFrameElements(List<FrameElement> frameElements) { + this.frameElements = frameElements; + } + + public int getRank() { + return rank; + } + + public void setRank(int rank) { + this.rank = rank; + } + + public double getScore() { + return score; + } + + public void setScore(double score) { + this.score = score; + } +} diff --git a/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/Frame.java b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/Frame.java new file mode 100644 index 0000000000000000000000000000000000000000..e98870c2029e6ff89a0c00bbe5e7802d7efbedf6 --- /dev/null +++ b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/Frame.java @@ -0,0 +1,28 @@ +package de.fuberlin.inf.agcsw.cer.models.framenet; + +import java.io.Serializable; +import java.util.List; + +/** + * Created by wojlukas on 3/7/16. + */ +public class Frame implements Serializable { + private List<AnnotationSet> annotationSets; + private FrameElement target; + + public List<AnnotationSet> getAnnotationSets() { + return annotationSets; + } + + public void setAnnotationSets(List<AnnotationSet> annotationSets) { + this.annotationSets = annotationSets; + } + + public FrameElement getTarget() { + return target; + } + + public void setTarget(FrameElement target) { + this.target = target; + } +} \ No newline at end of file diff --git a/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/FrameElement.java b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/FrameElement.java new file mode 100644 index 0000000000000000000000000000000000000000..5fcf156a97895e0f16a4fc77ffe6823f5b7fd22b --- /dev/null +++ b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/FrameElement.java @@ -0,0 +1,28 @@ +package de.fuberlin.inf.agcsw.cer.models.framenet; + +import java.io.Serializable; +import java.util.List; + +/** + * Created by wojlukas on 3/7/16. + */ +public class FrameElement implements Serializable { + private String name; + private List<Span> spans; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public List<Span> getSpans() { + return spans; + } + + public void setSpans(List<Span> spans) { + this.spans = spans; + } +} diff --git a/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/Span.java b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/Span.java new file mode 100644 index 0000000000000000000000000000000000000000..4d287dc5b9a055f1c7427895f2570f54d9e7191f --- /dev/null +++ b/ReutersReader/src/de/fuberlin/inf/agcsw/cer/models/framenet/Span.java @@ -0,0 +1,36 @@ +package de.fuberlin.inf.agcsw.cer.models.framenet; + +import java.io.Serializable; + +/** + * Created by wojlukas on 3/7/16. + */ +public class Span implements Serializable { + private int end; + private int start; + private String text; + + public int getEnd() { + return end; + } + + public void setEnd(int end) { + this.end = end; + } + + public int getStart() { + return start; + } + + public void setStart(int start) { + this.start = start; + } + + public String getText() { + return text; + } + + public void setText(String text) { + this.text = text; + } +} diff --git a/ReutersReader/src/de/fuberlin/inf/agcsw/dbpedia/annotation/models/SpotlightAnnotation.java b/ReutersReader/src/de/fuberlin/inf/agcsw/dbpedia/annotation/models/SpotlightAnnotation.java new file mode 100644 index 0000000000000000000000000000000000000000..84741e82777456189a86ebd8052946cb26166c64 --- /dev/null +++ b/ReutersReader/src/de/fuberlin/inf/agcsw/dbpedia/annotation/models/SpotlightAnnotation.java @@ -0,0 +1,72 @@ +package de.fuberlin.inf.agcsw.dbpedia.annotation.models; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +/** + * Created by wojlukas on 2/1/16. + */ +public class SpotlightAnnotation implements Serializable { + private double confidence; + private int support; + private String types; + private String sparql; + private String policy; + private List<SpotlightResource> resources = new ArrayList<>(); + + public SpotlightAnnotation() { + } + + public double getConfidence() { + return confidence; + } + + public void setConfidence(double confidence) { + this.confidence = confidence; + } + + public int getSupport() { + return support; + } + + public void setSupport(int support) { + this.support = support; + } + + public String getTypes() { + return types; + } + + public void setTypes(String types) { + this.types = types; + } + + public String getSparql() { + return sparql; + } + + public void setSparql(String sparql) { + this.sparql = sparql; + } + + public String getPolicy() { + return policy; + } + + public void setPolicy(String policy) { + this.policy = policy; + } + + public List<SpotlightResource> getResources() { + return resources; + } + + public void setResources(List<SpotlightResource> resources) { + this.resources = resources; + } + + public void addResource(SpotlightResource resource) { + this.resources.add(resource); + } +} \ No newline at end of file diff --git a/ReutersReader/src/de/fuberlin/inf/agcsw/dbpedia/annotation/models/SpotlightResource.java b/ReutersReader/src/de/fuberlin/inf/agcsw/dbpedia/annotation/models/SpotlightResource.java new file mode 100644 index 0000000000000000000000000000000000000000..f3173870d237dad8ee980d2e8ecb6356520abb95 --- /dev/null +++ b/ReutersReader/src/de/fuberlin/inf/agcsw/dbpedia/annotation/models/SpotlightResource.java @@ -0,0 +1,96 @@ +package de.fuberlin.inf.agcsw.dbpedia.annotation.models; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +/** + * Created by wojlukas on 2/1/16. + */ +public class SpotlightResource implements Serializable { + private String uri; + private int support; + private String typesString; + private String surfaceForm; + private int offset; + private double similarityScore; + private float percentageOfSecondRank; + + private List<String> rdfTypes = new ArrayList<>(); + private List<String> dctSubjects = new ArrayList<>(); + + public SpotlightResource() { + } + + public String getUri() { + return uri; + } + + public void setUri(String uri) { + this.uri = uri; + } + + public int getSupport() { + return support; + } + + public void setSupport(int support) { + this.support = support; + } + + public String getTypesString() { + return typesString; + } + + public void setTypesString(String typesString) { + this.typesString = typesString; + } + + public String getSurfaceForm() { + return surfaceForm; + } + + public void setSurfaceForm(String surfaceForm) { + this.surfaceForm = surfaceForm; + } + + public int getOffset() { + return offset; + } + + public void setOffset(int offset) { + this.offset = offset; + } + + public double getSimilarityScore() { + return similarityScore; + } + + public void setSimilarityScore(double similarityScore) { + this.similarityScore = similarityScore; + } + + public float getPercentageOfSecondRank() { + return percentageOfSecondRank; + } + + public void setPercentageOfSecondRank(float percentageOfSecondRank) { + this.percentageOfSecondRank = percentageOfSecondRank; + } + + public List<String> getRdfTypes() { + return rdfTypes; + } + + public void setRdfTypes(List<String> rdfTypes) { + this.rdfTypes = rdfTypes; + } + + public List<String> getDctSubjects() { + return dctSubjects; + } + + public void setDctSubjects(List<String> dctSubjects) { + this.dctSubjects = dctSubjects; + } +} diff --git a/ReutersReader/target/classes/META-INF/MANIFEST.MF b/ReutersReader/target/classes/META-INF/MANIFEST.MF new file mode 100644 index 0000000000000000000000000000000000000000..a49dfaf4eb37161079daf48a0310092d7b3a32b9 --- /dev/null +++ b/ReutersReader/target/classes/META-INF/MANIFEST.MF @@ -0,0 +1,5 @@ +Manifest-Version: 1.0 +Built-By: eike +Build-Jdk: 1.8.0_77 +Created-By: Maven Integration for Eclipse + diff --git a/ReutersReader/target/classes/META-INF/maven/asd/asd/pom.properties b/ReutersReader/target/classes/META-INF/maven/asd/asd/pom.properties new file mode 100644 index 0000000000000000000000000000000000000000..8440fdf3751b0de038815e480e3c376a9e39754f --- /dev/null +++ b/ReutersReader/target/classes/META-INF/maven/asd/asd/pom.properties @@ -0,0 +1,7 @@ +#Generated by Maven Integration for Eclipse +#Sun Apr 10 11:31:57 CEST 2016 +version=0.0.1-SNAPSHOT +groupId=asd +m2e.projectName=ReutersReader +m2e.projectLocation=/home/eike/workspace/ReutersReader +artifactId=asd diff --git a/ReutersReader/target/classes/META-INF/maven/asd/asd/pom.xml b/ReutersReader/target/classes/META-INF/maven/asd/asd/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..0275591a022bb74d6513a36de6297da9739836c4 --- /dev/null +++ b/ReutersReader/target/classes/META-INF/maven/asd/asd/pom.xml @@ -0,0 +1,34 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <groupId>asd</groupId> + <artifactId>asd</artifactId> + <version>0.0.1-SNAPSHOT</version> + <build> + <sourceDirectory>src</sourceDirectory> + <plugins> + <plugin> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.3</version> + <configuration> + <source>1.8</source> + <target>1.8</target> + </configuration> + </plugin> + </plugins> + </build> + <dependencies> + + <!-- Jackson --> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + <version>2.7.0</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + <version>2.7.0</version> + </dependency> + </dependencies> +</project> \ No newline at end of file