Skip to content
Snippets Groups Projects
Commit 54773273 authored by Maria Hartmann's avatar Maria Hartmann
Browse files

nr. 2,3

parent 432f4acc
No related branches found
No related tags found
No related merge requests found
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.sql.*;
import javax.naming.spi.DirStateFactory.Result;
class Tweet {// representing entity tweet
private String original_author;
private String tweet_text;
private String tweet_time;
private int tweet_ID;
private int retweet_count;
private int favorite_count;
// constructor
public Tweet(String original_author,
String tweet_text,
String tweet_time,
int tweet_ID,
int retweet_count,
int favorite_count){
this.original_author=original_author;
this.tweet_text=tweet_text;
this.tweet_time=tweet_time;
this.tweet_ID=tweet_ID;
this.retweet_count=retweet_count;
this.favorite_count=favorite_count;
}// end of constructor
public String get_original_author(){
return this.original_author;
}
public String get_tweet_text(){
return this.tweet_text;
}
public String get_tweet_time(){
return this.tweet_time;
}
public int get_tweet_ID(){
return this.tweet_ID;
}
public int get_retweet_count(){
return this.retweet_count;
}
public int get_favorite_count(){
return this.favorite_count;
}
}// end of class
class Hashtag {//representing entity hashtag from ER model
private int hashtag_ID;
private String hashtag_text;
public Hashtag(int hashtag_ID,String hashtag_text){
this.hashtag_ID=hashtag_ID;
this.hashtag_text=hashtag_text;
}// end of constructor
public int get_hashtag_ID(){
return this.hashtag_ID;
}
public String get_hashtag_text(){
return this.hashtag_text;
}
}// end of class hashtag
class User {//representing user entity from ER model
private String handle;
private int user_id;
public User(int user_id, String handle){
this.user_id=user_id;
this.handle=handle;
}// end of constructor
public String get_handle(){
return this.handle;
}
public int get_user_id(){
return this.user_id;
}
}// end of class user
class user2tweet { //representing user-tweet relation from ER model
private int user_id;
private int tweet_id;
public user2tweet(int user_id, int tweet_id){
this.user_id=user_id;
this.tweet_id=tweet_id;
}// end of constructor
public int get_user_id(){
return this.user_id;
}
public int get_tweet_id(){
return this.tweet_id;
}
}// end of class
class tweet2hashtag { //representing tweet-hashtag relation
private int tweet_id;
private int hashtag_id;
public tweet2hashtag(int tweet_id, int hashtag_id){
this.tweet_id=tweet_id;
this.hashtag_id=hashtag_id;
}
public int get_tweet_id(){
return this.tweet_id;
}
public int get_hashtag_id(){
return this.hashtag_id;
}
}
class tweet2Retweet{
int tweet_id_Original;
int tweet_id_Copy;
public tweet2Retweet(int tweet_id_Original,int tweet_id_Copy){
this.tweet_id_Original=tweet_id_Original;
this.tweet_id_Copy=tweet_id_Copy;
}// end of constructtor
public int get_tweet_id_Original() {
return this.tweet_id_Original;
}
public int get_tweet_id_Copy() {
return this.tweet_id_Copy;
}
}// end of class
//#############################################################################################################################################
public class Clean {
public ArrayList<Tweet> tweetList=new ArrayList<Tweet>();
public ArrayList<Hashtag> HashtagList=new ArrayList<Hashtag>();
public ArrayList<User> tweetUserList=new ArrayList<User>();
public ArrayList<String> userList=new ArrayList<String>();
public ArrayList<String> hashtagList_entire=new ArrayList<String> ();
public ArrayList<Tweet> get_tweetList(){
return this.tweetList;
}
public ArrayList<Hashtag> get_HashtagList(){
return this.HashtagList;
}
public ArrayList<User> get_tweetUserList(){
return this.tweetUserList;
}
public void printTweet(){
for(int i=0; i<this.tweetList.size();i++){
Tweet tweet=this.tweetList.get(i);
System.out.println(tweet.get_tweet_ID()+" "+tweet.get_tweet_text());
}
}
//#####################################################################################################################################
public static void justRead(String xfileLocation){ //reads content of a given file
Scanner scanIn=null;
String inputLine="";
int counter=0;
try {
scanIn=new Scanner (new BufferedReader(new FileReader(xfileLocation)));
scanIn.next();//skip header line
while (scanIn.hasNextLine()) {//while there are unread lines
inputLine = scanIn.nextLine();//read next line
System.out.println(counter+" "+inputLine);
counter++;//count number of lines read (minus header)
}
scanIn.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}// end of class
//############################################################################################################################################################
public static ArrayList<String> removeDup(ArrayList<String> hashList){//remove duplicate hashtags
ArrayList<String> temp= new ArrayList<String>();
for(int i=0; i<hashList.size();i++) {//iterate hashtag list
String hash=hashList.get(i);//check for every hashtag
if(temp.contains(hash)==false){//whether it has been read before
temp.add(hash);//if not, save it / otherwise discard
}// end of if
}// end of for
return temp;//return list of unique hashtags
}
public void concati(ArrayList<String> hashList) {// concatenate sublists of hashtags
for(int i=0; i<hashList.size();i++){
String hash=hashList.get(i);
if(this.hashtagList_entire.contains(hash)==false){
this.hashtagList_entire.add(hash);
}// end of if
}// end of while
}
public static int charToASCII(final char character) {//converts character to correspondin ASCII value
return (int)character;
}
public static boolean checkASCII(char chr) {//check for invalid (e.g. special) characters
int valASCII=(int)chr;
if( (valASCII>=65 && valASCII<=90) //capital Letter
|| (valASCII>=97 && valASCII<=122) // small letter
|| (valASCII>=48 && valASCII<=57)){// numbers
return true;//if char is either letter or number: return true (valid)
}else{
return false;//otherwise return false (reject)
}
}
public static ArrayList<String> identifyHash(String input) {//identify hashtags in string
int i=0;
int index_start_hashtag=0; //index of first character of hashtag
int index_end_hashtag=0; //index of last character of hashtag
ArrayList<String> hashList = new ArrayList<String>();//list of all hashtags found
for(i=0; i<input.length();i++) { //loop through input string (char array)
char sign=input.charAt(i);
if(sign=='#') {//if hashtag symbol is found
char sign2=input.charAt(i+1);
if(checkASCII(sign2)) { //check if next char following hashtag symbol is valid hashtag character
index_start_hashtag=i;
int count_length=i+1;//count length of hashtag
char iterate=0;
int j=0;
while(checkASCII(iterate)||(int)iterate==0) {//while next char belongs to hashtag
//
if(count_length==input.length()) {//if end of string is reached
j=count_length;
break; //stop
} else { //otherwise
iterate=input.charAt(count_length);//look at next character
j=count_length;
count_length++; //increment length
}
}// end of while
index_end_hashtag=j;
String hash=input.substring(index_start_hashtag,index_end_hashtag);//extract hashtag using indices
hashList.add(hash);//add hashtag to list
i=count_length;
} else {
//do nothing
}// end of if/ else
}
}// end of for
return hashList;//return
}// end of identifyHash
public static void displayHashes(ArrayList<String> hashList) {//print hashtag list
for(int i=0; i<hashList.size(); i++){
System.out.println(hashList.get(i));
}// end of for
}// end of displayHashes
public ArrayList<String> orderAlpha(ArrayList<String> hashList) {//order hashtags alphabetically? bubblesort?!
int size=hashList.size();
int i;
for(i=0; i<size;i++){
for(int j=i+1; j<size;j++){
if(hashList.get(i).compareTo(hashList.get(j))<0){
String tmp=hashList.get(i);
hashList.set(i, hashList.get(j));
hashList.set(j,tmp);
}
}//end of inner for loop
}//end of outer for loop
return hashList;
}
//############################################################################################################################################################
public static String replaceAllSemi(String str) {// replace
for(int i=0; i<str.length();i++){
char sign=str.charAt(i);
if(sign==';'){
str=str.substring(0,i-1)+","+str.substring(i+1);
}
}
return str;
}// end of replaceAllSemi
//################################################################################################
//
// public static String identifyOriginalAuthor(String tuple){
//
// }// end of identifyOriginalAuthor
//
//################################################################################################
public void readCSV(String xfileLocation) throws FileNotFoundException{
Scanner scanIn=null;
String inputLine="";
String tmp=null;
boolean mem_activ=false;
int counter=0;
int untilBoolean=0;
int subINDEXend=0;
int subINDEXstart=0;
//----------------------------------------------------------------
Tweet tweetTuple=null;
ArrayList<String> hashtagList_singleTuple=new ArrayList<String> ();
String originalAuthor="";
//----------------------------------------------------------------
String finaleTuple="";
scanIn=new Scanner (new BufferedReader(new FileReader(xfileLocation)));
scanIn.next();
while (scanIn.hasNextLine()){
inputLine = scanIn.nextLine();
if (inputLine.isEmpty() || inputLine.trim().equals("") || inputLine.trim().equals("\\n")) {
//-----------------------------------/ClEANING/-------------------------------------
} else {
if(mem_activ==true) {//if there was a ';' in tweet_text
inputLine=tmp.concat(" "+inputLine);//concatenate text substrings
mem_activ=false;//reset flag
}
if((inputLine.endsWith(";False")||inputLine.endsWith(";True"))==false){//detect substrings of tweet_text if text contains ';'
tmp=inputLine;//save first substring
mem_activ=true;//set flag
} else {
counter++;//represents IDs
subINDEXend=inputLine.indexOf(";2016");//find boundaries of tweet_text
subINDEXstart=inputLine.indexOf(";")+1;
String sub2Index=inputLine.substring(subINDEXstart,subINDEXend);
//System.out.println(counter+" "+sub2Index);
if(sub2Index.contains(";False;")){
untilBoolean=sub2Index.indexOf(";False;");
}else if (sub2Index.contains(";True;")){
untilBoolean=sub2Index.indexOf(";True;");
}
//endIndex=subINDEXstart+untilBoolean;
String tweetClean=sub2Index.substring(0,untilBoolean);
if(tweetClean.contains(";")) {//replace semicolons within text with ','
inputLine=replaceAllSemi(tweetClean);
}if(tweetClean.contains("'")){
inputLine=inputLine.replaceAll("'","''");
} else {}
finaleTuple=inputLine;
//---------------------------------------------------------------------------------------------------------------------
//
String[] values = finaleTuple.split(";");
//-----------------------------------/Generate instance ofTweet/------------------
if(values[2].equals("False")){//if tweet is no retweet
originalAuthor=values[0];//set original author to handle
}else{ //if tweet is retweet
originalAuthor=values[3];//set original author to value of eponymous field
}
String tweet_text=values[1];//set remaining variables
String tweet_time=values[4];
int ID=counter;
int retweet_count=Integer.parseInt(values[7]);
int favorite_count=Integer.parseInt(values[8]);
tweetTuple=new Tweet (originalAuthor, tweet_text, tweet_time, ID, retweet_count, favorite_count);//create Tweet instance
this.tweetList.add(tweetTuple);//add to list of tweets
this.userList.add(values[0]);// Stores all users
//
hashtagList_singleTuple=identifyHash(tweet_text);//extract hashtags
hashtagList_singleTuple=removeDup(hashtagList_singleTuple);//remove duplicates$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
concati(hashtagList_singleTuple); //add to list of all hashtags
}// end of while
//-----------------------------------/Generate instance of Hashtags/----------------------------------------------------------------------------------
this.hashtagList_entire=removeDup(this.hashtagList_entire);
Collections.sort(this.hashtagList_entire);
//keinedisplayHashes(hashtagList_entire);
for(int i=0; i<this.hashtagList_entire.size();i++) { //create hashtag instances
String hashtag=this.hashtagList_entire.get(i);
Hashtag hash=new Hashtag(i, hashtag);
HashtagList.add(hash);// stores all Hashtags (with alphabetical order) with ID
}
//-----------------------------------/Generate instance of User/------------------------------------
this.userList=removeDup(this.userList);
for(int i=0; i<this.userList.size(); i++) {//create user instances
String userNamer=this.userList.get(i);
User tweetUser=new User(i,userNamer );
tweetUserList.add(tweetUser);
}//end of for-loop
scanIn.close();
}}}//end of readCSV
//################################################################################################################################################################
//-----MAIN-----
//################################################################################################################################################################
public static void main(String[] args) throws IOException {
String xfileLocation2="/home/ubuntumac/Dropbox/TI2/DBS/M.csv";
ArrayList<Tweet> tweetList=null;
ArrayList<Hashtag> hashtagList=null;
ArrayList<User> userList=null;
Clean database=new Clean();
database.readCSV(xfileLocation2);
hashtagList=database.HashtagList;
tweetList=database.tweetList;
userList=database.tweetUserList;
int size_tweetList=tweetList.size();
int size_userList=userList.size();
System.out.println(size_userList);
//csv.export_Hashtag(database);
//csv.export_Tweet(database);
//csv.export_User(database);
// database.printTweet();
//######################################## test: identifyHash and removeDup
// ArrayList<String> hashList=null;
// ArrayList<String> ShorthashList=null;
// String input="zehdzh #Trump is a #motherfucker hzgf #Hillary saves America #Trump";
//
// System.out.println("Old: ");
// hashList=identifyHash(input);
// displayHashes(hashList);
// System.out.println("NEW: ");
// ShorthashList=removeDup(hashList);
// displayHashes(ShorthashList);
// char chr='m';
// int ascii=(int)chr;
// System.out.println(ascii);
//######################################## test: Export into DATABASE
String dbServer="localhost";
String dbPort="5432";
String dbName="Election";
String dbUser="postgres";
String password="mama";
// Connection to postgres
Connection conn=null;
Statement stmt1,stmt2,stmt3,stmt4,stmt5,stmt6, stmt8;
PreparedStatement stmt7=null;
try{
conn= DriverManager.getConnection("jdbc:postgresql://" + dbServer + ":" + dbPort + "/" + dbName, dbUser , password);
System.out.println("Connection to database " + dbName + "@" + dbServer + ":" + dbPort + " successfully established.");
}catch(SQLException sqle){
System.out.println("The connection could not be established.");
sqle.printStackTrace();
System.exit(0);
}
// Set-up Tables
try {
stmt1=conn.createStatement();
stmt2=conn.createStatement();
stmt3=conn.createStatement();
stmt4=conn.createStatement();
stmt5=conn.createStatement();
stmt6=conn.createStatement();
stmt8=conn.createStatement();
String Tweet_SQLNew="INSERT INTO tweet(id ,retweet_count, favorite_count, tweet_text, tweet_time, original_author )" +
"VALUES(?,?,?,?,?,?)";
String Tweet_SQLNew2="INSERT INTO tweet(id ,retweet_count, favorite_count, tweet_text, original_author )" +
"VALUES(?,?,?,?,?)";
stmt7=conn.prepareStatement(Tweet_SQLNew);
//------------CREATE the Table-----------------------------tweet---------
String creat_tweet="CREATE TABLE tweet (id serial NOT NULL," +
"retweet_count int NOT NULL, " +
"favorite_count int NOT NULL, " +
"tweet_text text NOT NULL," +
"tweet_time timestamp," +
"original_author text NOT NULL," +
"PRIMARY KEY (id),"+
"CONSTRAINT vorgaenger_ID FOREIGN KEY(id) REFERENCES tweet(id))";
stmt1.executeUpdate(creat_tweet);
//------------CREATE the Table-----------------------------tweeter_user---------
// String creat_user="CREATE TABLE tweeter_user (id serial NOT NULL," +
// "handle text NOT NULL, " +
// "PRIMARY KEY (id))";
// stmt2.executeUpdate(creat_user);
////------------CREATE the Table-----------------------------tweeter_hashtag---------
// String creat_hashtags="CREATE TABLE hashtags (id serial NOT NULL," +
// "hashtag_text text NOT NULL," +
// "PRIMARY KEY (id))";
// stmt3.executeUpdate(creat_hashtags);
//
//------------Transfer the Table-----------------------------hashTag---------
// for(int i=0; i<size_hashtagList;i++){
// Hashtag tmpHash=hashtagList.get(i);
//
// String ID=Integer.toString(tmpHash.get_hashtag_ID());
// String hashtag_text=tmpHash.get_hashtag_text();
//
//
// String hashtag_SQL="INSERT INTO hashtags(id, hashtag_text)" +
// "VALUES("+ID+",'"+hashtag_text+"')";
// stmt4.executeUpdate(hashtag_SQL);
// }
//------------Transfer the Table-----------------------------Tweet---------
for(int i=0; i<size_tweetList;i++){
Tweet tmpTweet=tweetList.get(i);
int tweet_ID=tmpTweet.get_tweet_ID();
String tweet_text=tmpTweet.get_tweet_text();
String original_author=tmpTweet.get_original_author();
String tweet_time=tmpTweet.get_tweet_time();
int retweet_count=tmpTweet.get_retweet_count();
int favorite_count=tmpTweet.get_favorite_count();
// String Tweet_SQL="INSERT INTO tweet(id ,retweet_count, favorite_count, tweet_text, tweet_time, original_author )" +
// "VALUES("+tweet_ID+","
// +retweet_count+","
// +favorite_count+",'"
// +tweet_text+"','"
// +tweet_time+"','"
// +original_author+"')";
//
stmt7.setInt(1, tweet_ID);
stmt7.setInt(2, retweet_count);
stmt7.setInt(3, favorite_count);
stmt7.setTimestamp(5, null);
stmt7.setString(4, tweet_text);
stmt7.setString(6, original_author);
stmt7.executeUpdate();
String Tweet_SQL="UPDATE tweet SET tweet_time='"+tweet_time+"' WHERE id="+tweet_ID;
stmt5.executeUpdate(Tweet_SQL);
}
//------------Transfer the Table-----------------------------USER---------
// for(int i=0; i<size_userList;i++){
// User tmpUser=userList.get(i);
// String user_id=Integer.toString(tmpUser.user_id);
// String handle=tmpUser.handle;
//
// String user_SQL="INSERT INTO tweeter_user(id, handle)" +
// "VALUES("+user_id+",'"+handle+"')";
// stmt6.executeUpdate(user_SQL);
//
//
//
// }
// end of Transfer the Table-----------------------------USER---------
} catch (SQLException e) {
e.printStackTrace();
}// END of database
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment