Skip to content
Snippets Groups Projects
Select Git revision
  • 54773273930401abd41729aee579ba2d85763bc9
  • master default protected
2 results

Clean_commented.java

  • user avatar
    Maria Hartmann authored
    54773273
    History
    Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    Clean_commented.java 19.64 KiB
    
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.Scanner;
    import java.util.regex.Matcher;
    
    import java.sql.*;
    
    import javax.naming.spi.DirStateFactory.Result;
    
    
    
    class Tweet {// representing entity tweet
    	private String original_author;
    	private String tweet_text;
    	private String tweet_time;
    	private int tweet_ID;
    	private int retweet_count;
    	private int favorite_count;
    	
    	// constructor
    	public Tweet(String original_author,
    					String tweet_text,
    					String tweet_time,
    					int tweet_ID,
    					int retweet_count,
    					int favorite_count){
    		
    		this.original_author=original_author;
    		this.tweet_text=tweet_text;
    		this.tweet_time=tweet_time;
    		this.tweet_ID=tweet_ID;
    		this.retweet_count=retweet_count;
    		this.favorite_count=favorite_count;
    	}// end of constructor
    	
    	public String get_original_author(){
    		return this.original_author;
    	}
    	
    	public String get_tweet_text(){
    		return this.tweet_text;
    	}
    	
    	public String get_tweet_time(){
    		return this.tweet_time;
    	}
    	
    	public int get_tweet_ID(){
    		return this.tweet_ID;
    	}
    	
    	public int get_retweet_count(){
    		return this.retweet_count;
    	}
    	
    	public int get_favorite_count(){
    		return this.favorite_count;
    	}
    
    	
    }// end of class
    
    class Hashtag {//representing entity hashtag from ER model
    	private int hashtag_ID;
    	private String hashtag_text;
    	
    	public Hashtag(int hashtag_ID,String hashtag_text){
    		this.hashtag_ID=hashtag_ID;
    		this.hashtag_text=hashtag_text;
    	}// end of constructor
    	
    	public int get_hashtag_ID(){
    		return this.hashtag_ID;
    	}
    	
    	public String get_hashtag_text(){
    		return this.hashtag_text;
    	}
    	
    	
    }// end of class hashtag
    
    
    class User {//representing user entity from ER model
    	private String handle;
    	private int user_id;
    	
    	public User(int user_id, String handle){
    		this.user_id=user_id;
    		this.handle=handle;
    	}// end of constructor
    	
    	public String get_handle(){
    		return this.handle;
    	}
    	
    	public int get_user_id(){
    		return this.user_id;
    	}
    }// end of class user
    
    
    class user2tweet { //representing user-tweet relation from ER model
    	private int user_id;
    	private int tweet_id;
    	
    	public user2tweet(int user_id, int tweet_id){
    		this.user_id=user_id;
    		this.tweet_id=tweet_id;
    	}// end of constructor
    
    	public int get_user_id(){
    		return this.user_id;
    	}
    	public int get_tweet_id(){
    		return this.tweet_id;
    	}
    }// end of class
    
    class tweet2hashtag { //representing tweet-hashtag relation
    	private int tweet_id;
    	private int hashtag_id;
    	
    	public tweet2hashtag(int tweet_id, int hashtag_id){
    		this.tweet_id=tweet_id;
    		this.hashtag_id=hashtag_id;
    	}
    	public int get_tweet_id(){
    		return this.tweet_id;
    	}
    	public int get_hashtag_id(){
    		return this.hashtag_id;
    	}
    }
    
    
    class tweet2Retweet{
    	int tweet_id_Original;
    	int tweet_id_Copy;
    	
    	public tweet2Retweet(int tweet_id_Original,int tweet_id_Copy){
    		this.tweet_id_Original=tweet_id_Original;
    		this.tweet_id_Copy=tweet_id_Copy;
    	}// end of constructtor
    	public int get_tweet_id_Original() {
    		return this.tweet_id_Original;
    	}
    	public int get_tweet_id_Copy() {
    		return this.tweet_id_Copy;
    	}
    	
    }// end of class
    
    //#############################################################################################################################################	
    
    public class Clean {
    	
    	
    	public ArrayList<Tweet> tweetList=new ArrayList<Tweet>();
    	public ArrayList<Hashtag> HashtagList=new ArrayList<Hashtag>();
    	public ArrayList<User> tweetUserList=new ArrayList<User>();
    	public ArrayList<String> userList=new ArrayList<String>();
    	public ArrayList<String> hashtagList_entire=new ArrayList<String> ();
    	
    	
    	public ArrayList<Tweet> get_tweetList(){
    		return this.tweetList;
    	}
    	
    	public ArrayList<Hashtag> get_HashtagList(){
    		return this.HashtagList;
    	}
    	
    	public ArrayList<User> get_tweetUserList(){
    		return this.tweetUserList;
    	}
    	
    	public void printTweet(){
    		for(int i=0; i<this.tweetList.size();i++){
    			Tweet tweet=this.tweetList.get(i);
    			System.out.println(tweet.get_tweet_ID()+"  "+tweet.get_tweet_text());
    		}
    		
    	}
    //#####################################################################################################################################		
    	public static void justRead(String xfileLocation){ //reads content of a given file
    		Scanner scanIn=null;
    		String inputLine="";
    		int counter=0;
    		
    		try {
    			scanIn=new Scanner (new BufferedReader(new FileReader(xfileLocation)));
    			scanIn.next();//skip header line
    			while (scanIn.hasNextLine()) {//while there are unread lines
    				
    				inputLine = scanIn.nextLine();//read next line
    				System.out.println(counter+"  "+inputLine);
    				counter++;//count number of lines read (minus header)
    			}
    			scanIn.close();
    		} catch (FileNotFoundException e) {
    			e.printStackTrace();
    		}
    	}// end of class
    //############################################################################################################################################################	
    		
    	public static ArrayList<String> removeDup(ArrayList<String> hashList){//remove duplicate hashtags
    		ArrayList<String> temp= new ArrayList<String>();
    		for(int i=0; i<hashList.size();i++) {//iterate hashtag list
    			String hash=hashList.get(i);//check for every hashtag
    			if(temp.contains(hash)==false){//whether it has been read before
    				temp.add(hash);//if not, save it / otherwise discard
    			}// end of if
    		}// end of for
    		return temp;//return list of unique hashtags
    	}
    	
    	public void concati(ArrayList<String> hashList) {// concatenate sublists of hashtags
    		for(int i=0; i<hashList.size();i++){
    			String hash=hashList.get(i);
    			if(this.hashtagList_entire.contains(hash)==false){
    				this.hashtagList_entire.add(hash);
    			}// end of if
    		}// end of while
    	}
    	
    	public static int charToASCII(final char character) {//converts character to correspondin ASCII value
    		return (int)character;
    	}
    	
    	public static boolean checkASCII(char chr) {//check for invalid (e.g. special) characters
    		int valASCII=(int)chr;
    		if(        (valASCII>=65 && valASCII<=90) //capital Letter
    				|| (valASCII>=97 && valASCII<=122) // small letter
    				|| (valASCII>=48 && valASCII<=57)){// numbers
    			return true;//if char is either letter or number: return true (valid)
    		}else{
    			return false;//otherwise return false (reject)
    		}
    	}
    	
    	public static ArrayList<String> identifyHash(String input) {//identify hashtags in string
    			int i=0;
    			int index_start_hashtag=0; //index of first character of hashtag
    			int index_end_hashtag=0; //index of last character of hashtag
    			ArrayList<String> hashList = new ArrayList<String>();//list of all hashtags found
    			for(i=0; i<input.length();i++) { //loop through input string (char array)
    				char sign=input.charAt(i);
    
    				if(sign=='#') {//if hashtag symbol is found
    					char sign2=input.charAt(i+1); 
    					if(checkASCII(sign2)) { //check if next char following hashtag symbol is valid hashtag character
    						index_start_hashtag=i;
    						int count_length=i+1;//count length of hashtag
    						char iterate=0;
    						int j=0;
    						while(checkASCII(iterate)||(int)iterate==0) {//while next char belongs to hashtag
    //				
    							if(count_length==input.length()) {//if end of string is reached
    								j=count_length;
    								break;			//stop
    							} else {			//otherwise
    								iterate=input.charAt(count_length);//look at next character
    								j=count_length;
    								count_length++;		//increment length
    							}
    						}// end of while
    						index_end_hashtag=j;
    						
    						String hash=input.substring(index_start_hashtag,index_end_hashtag);//extract hashtag using indices	
    						hashList.add(hash);//add hashtag to list
    						i=count_length;
    					} else {
    						//do nothing
    					}// end of if/ else
    				}
    			}// end of for
    				
    			return hashList;//return 
    		}// end of identifyHash
    	
    	public static void displayHashes(ArrayList<String> hashList) {//print hashtag list
    		for(int i=0; i<hashList.size(); i++){
    			System.out.println(hashList.get(i));
    		}// end of for
    	}// end of displayHashes
    	
    	public ArrayList<String>  orderAlpha(ArrayList<String> hashList) {//order hashtags alphabetically? bubblesort?!
    		int size=hashList.size();
    		int i;
    		for(i=0; i<size;i++){
    			for(int j=i+1; j<size;j++){
    				if(hashList.get(i).compareTo(hashList.get(j))<0){
    					String tmp=hashList.get(i);
    					hashList.set(i, hashList.get(j));
    					hashList.set(j,tmp);
    				}
    				
    			}//end of inner for loop
    		}//end of outer for loop
    		return hashList;
    	}
    		
    //############################################################################################################################################################	
    		
    	
    	
    	
    	public static String replaceAllSemi(String str) {// replace 
    
    		for(int i=0; i<str.length();i++){
    			char sign=str.charAt(i);
    			if(sign==';'){
    				str=str.substring(0,i-1)+","+str.substring(i+1);
    			}
    		}
    		return str;
    	}// end of replaceAllSemi
    
    //################################################################################################	
    //	
    //	public static String identifyOriginalAuthor(String tuple){
    //		
    //	}// end of identifyOriginalAuthor
    //	
    //################################################################################################	
    	public void readCSV(String xfileLocation) throws FileNotFoundException{
    		
    		Scanner scanIn=null;
    		String inputLine="";
    		String tmp=null;
    		boolean mem_activ=false;
    		int counter=0;
    		int untilBoolean=0;
    		int subINDEXend=0;
    		int subINDEXstart=0;
    
    //----------------------------------------------------------------		
    		Tweet tweetTuple=null;
    		ArrayList<String> hashtagList_singleTuple=new ArrayList<String> ();
    
    		String originalAuthor="";
    //----------------------------------------------------------------	
    		
    		String finaleTuple="";
    		scanIn=new Scanner (new BufferedReader(new FileReader(xfileLocation)));
    		scanIn.next();
    		while (scanIn.hasNextLine()){
    			inputLine = scanIn.nextLine();
    			if (inputLine.isEmpty() || inputLine.trim().equals("") || inputLine.trim().equals("\\n")) {
    				
    //-----------------------------------/ClEANING/-------------------------------------						
    			} else { 
    				if(mem_activ==true) {//if there was a ';' in tweet_text
    					inputLine=tmp.concat(" "+inputLine);//concatenate text substrings
    					mem_activ=false;//reset flag
    				}
    				if((inputLine.endsWith(";False")||inputLine.endsWith(";True"))==false){//detect substrings of tweet_text if text contains ';'
    					tmp=inputLine;//save first substring
    					mem_activ=true;//set flag
    				} else {
    					counter++;//represents IDs
    			
    					subINDEXend=inputLine.indexOf(";2016");//find boundaries of tweet_text 
    					subINDEXstart=inputLine.indexOf(";")+1;
    					String sub2Index=inputLine.substring(subINDEXstart,subINDEXend);
    					//System.out.println(counter+"  "+sub2Index);
    					
    					if(sub2Index.contains(";False;")){
    						untilBoolean=sub2Index.indexOf(";False;");
    					}else if (sub2Index.contains(";True;")){
    						untilBoolean=sub2Index.indexOf(";True;");
    					}
    					//endIndex=subINDEXstart+untilBoolean;
    					String tweetClean=sub2Index.substring(0,untilBoolean);
    					
    					if(tweetClean.contains(";")) {//replace semicolons within text with ','
    						inputLine=replaceAllSemi(tweetClean);
    					}if(tweetClean.contains("'")){
    						inputLine=inputLine.replaceAll("'","''");
    					} else {}
    					finaleTuple=inputLine;
    //---------------------------------------------------------------------------------------------------------------------						
    //						
    					String[] values = finaleTuple.split(";");
    						
    //-----------------------------------/Generate instance ofTweet/------------------								
    						
    					if(values[2].equals("False")){//if tweet is no retweet
    						originalAuthor=values[0];//set original author to handle
    					}else{						//if tweet is retweet
    						originalAuthor=values[3];//set original author to value of eponymous field
    					}
    					String tweet_text=values[1];//set remaining variables
    					String tweet_time=values[4];
    					int ID=counter;
    					int retweet_count=Integer.parseInt(values[7]);
    					int favorite_count=Integer.parseInt(values[8]);
    					
    					tweetTuple=new Tweet (originalAuthor, tweet_text, tweet_time, ID, retweet_count, favorite_count);//create Tweet instance
    					this.tweetList.add(tweetTuple);//add to list of tweets
    					
    					this.userList.add(values[0]);// Stores all users
    //					
    					hashtagList_singleTuple=identifyHash(tweet_text);//extract hashtags
    					hashtagList_singleTuple=removeDup(hashtagList_singleTuple);//remove duplicates$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
    					concati(hashtagList_singleTuple);	//add to list of all hashtags
    						
    					
    			}// end of while
    			
    //-----------------------------------/Generate instance of Hashtags/----------------------------------------------------------------------------------	
    			this.hashtagList_entire=removeDup(this.hashtagList_entire);
    			Collections.sort(this.hashtagList_entire);
    			//keinedisplayHashes(hashtagList_entire);
    			
    			for(int i=0; i<this.hashtagList_entire.size();i++) { //create hashtag instances
    				String hashtag=this.hashtagList_entire.get(i);
    				Hashtag hash=new Hashtag(i, hashtag);
    				HashtagList.add(hash);// stores all Hashtags  (with alphabetical order) with ID
    			}
    			
    //-----------------------------------/Generate instance of User/------------------------------------
    			this.userList=removeDup(this.userList);
    			for(int i=0; i<this.userList.size(); i++) {//create user instances
    				String userNamer=this.userList.get(i);
    				User tweetUser=new User(i,userNamer );
    				tweetUserList.add(tweetUser);
    			}//end of for-loop
    			
    			scanIn.close();
    				}}}//end of readCSV
    //################################################################################################################################################################
    	//-----MAIN-----
    //################################################################################################################################################################	
    		public static void main(String[] args) throws IOException {
    
    			String xfileLocation2="/home/ubuntumac/Dropbox/TI2/DBS/M.csv";
    			ArrayList<Tweet> tweetList=null;
    			ArrayList<Hashtag> hashtagList=null;
    			ArrayList<User> userList=null;
    			
    			Clean database=new Clean();
    			
    			database.readCSV(xfileLocation2);
    			
    			hashtagList=database.HashtagList;
    			tweetList=database.tweetList;
    			userList=database.tweetUserList;
    			int size_tweetList=tweetList.size();
    			int size_userList=userList.size();
    			
    			System.out.println(size_userList);
    			
    			
    			
    			//csv.export_Hashtag(database);
    			//csv.export_Tweet(database);
    			//csv.export_User(database);
    //			database.printTweet();
    			
    //######################################## test: identifyHash and removeDup	
    //			ArrayList<String> hashList=null;
    //			ArrayList<String> ShorthashList=null;
    //			String input="zehdzh #Trump is a #motherfucker hzgf #Hillary saves America #Trump";
    //			
    //			System.out.println("Old: ");
    //			hashList=identifyHash(input);
    //			displayHashes(hashList);
    //			System.out.println("NEW: ");
    //			ShorthashList=removeDup(hashList);
    //			displayHashes(ShorthashList);
    			
    //			char chr='m';
    //			int ascii=(int)chr;
    //			System.out.println(ascii);
    			
    //######################################## test: Export into DATABASE
    			
    			
    			String dbServer="localhost";
    			String dbPort="5432";
    			String dbName="Election";
    			String dbUser="postgres";
    			String password="mama";
    			
    			
    			// Connection to postgres
    			Connection conn=null;
    			Statement stmt1,stmt2,stmt3,stmt4,stmt5,stmt6, stmt8;
    			PreparedStatement stmt7=null;
    			try{
    			conn= DriverManager.getConnection("jdbc:postgresql://" + dbServer + ":" + dbPort + "/" + dbName, dbUser , password);
    			System.out.println("Connection to database " + dbName + "@" + dbServer + ":" + dbPort + " successfully established.");
    			
    			
    			}catch(SQLException sqle){
    				System.out.println("The connection could not be established.");
    				sqle.printStackTrace();
    				System.exit(0);
    			}
    			
    			
    			
    			// Set-up Tables
    			try {
    				stmt1=conn.createStatement();
    				stmt2=conn.createStatement();
    				stmt3=conn.createStatement();
    				stmt4=conn.createStatement();
    				stmt5=conn.createStatement();
    				stmt6=conn.createStatement();
    				stmt8=conn.createStatement();
    				
    				
    				
    				String Tweet_SQLNew="INSERT INTO tweet(id ,retweet_count, favorite_count, tweet_text, tweet_time, original_author )" +
    						"VALUES(?,?,?,?,?,?)";
    				
    				String Tweet_SQLNew2="INSERT INTO tweet(id ,retweet_count, favorite_count, tweet_text, original_author )" +
    						"VALUES(?,?,?,?,?)";
    				
    				stmt7=conn.prepareStatement(Tweet_SQLNew);
    
    				
    //------------CREATE the Table-----------------------------tweet---------		
    				
    				String creat_tweet="CREATE TABLE tweet (id serial NOT NULL," +
    											"retweet_count int  NOT NULL, " +
    											"favorite_count int NOT NULL, " +
    											"tweet_text text  NOT NULL," +
    											"tweet_time timestamp," +
    											"original_author text  NOT NULL," +
    											"PRIMARY KEY (id),"+
    											"CONSTRAINT vorgaenger_ID FOREIGN KEY(id) REFERENCES tweet(id))";
    				
    				
    				
    				stmt1.executeUpdate(creat_tweet);
    				
    //------------CREATE the Table-----------------------------tweeter_user---------		
    				
    //				String creat_user="CREATE TABLE tweeter_user (id serial NOT NULL," +
    //											"handle text  NOT NULL, " +
    //											"PRIMARY KEY (id))";
    //				stmt2.executeUpdate(creat_user);
    				
    				
    ////------------CREATE the Table-----------------------------tweeter_hashtag---------		
    				
    //				String creat_hashtags="CREATE TABLE hashtags (id serial NOT NULL," +
    //											"hashtag_text text  NOT NULL," +
    //											"PRIMARY KEY (id))";
    //				stmt3.executeUpdate(creat_hashtags);
    //				
    
    //------------Transfer the Table-----------------------------hashTag---------
    				
    //				for(int i=0; i<size_hashtagList;i++){
    //					Hashtag tmpHash=hashtagList.get(i);
    //					
    //					String ID=Integer.toString(tmpHash.get_hashtag_ID());
    //					String hashtag_text=tmpHash.get_hashtag_text();
    //					
    //					
    //					String hashtag_SQL="INSERT INTO hashtags(id, hashtag_text)" +
    //							"VALUES("+ID+",'"+hashtag_text+"')";
    //					stmt4.executeUpdate(hashtag_SQL);
    //				}
    				
    //------------Transfer the Table-----------------------------Tweet---------
    				for(int i=0; i<size_tweetList;i++){
    					
    					Tweet tmpTweet=tweetList.get(i);
    					
    					int tweet_ID=tmpTweet.get_tweet_ID();
    					String tweet_text=tmpTweet.get_tweet_text();
    					String original_author=tmpTweet.get_original_author();
    					String tweet_time=tmpTweet.get_tweet_time();
    					int retweet_count=tmpTweet.get_retweet_count();
    					int favorite_count=tmpTweet.get_favorite_count();
    					
    					
    					
    //					String Tweet_SQL="INSERT INTO tweet(id ,retweet_count, favorite_count, tweet_text, tweet_time, original_author )" +
    //							"VALUES("+tweet_ID+","
    //									+retweet_count+","
    //									+favorite_count+",'"
    //									+tweet_text+"','"
    //									+tweet_time+"','"
    //									+original_author+"')";
    //		
    					
    
    					
    					stmt7.setInt(1, tweet_ID);
    					stmt7.setInt(2, retweet_count);
    					stmt7.setInt(3, favorite_count);
    					stmt7.setTimestamp(5, null);
    					stmt7.setString(4, tweet_text);
    					stmt7.setString(6, original_author);
    					stmt7.executeUpdate();
    					
    					String Tweet_SQL="UPDATE tweet SET tweet_time='"+tweet_time+"' WHERE id="+tweet_ID;
    					stmt5.executeUpdate(Tweet_SQL);
    				}
    				
    //------------Transfer the Table-----------------------------USER---------				
    //				for(int i=0; i<size_userList;i++){
    //					User tmpUser=userList.get(i);
    //					String user_id=Integer.toString(tmpUser.user_id);
    //					String handle=tmpUser.handle;
    //					
    //					String user_SQL="INSERT INTO tweeter_user(id, handle)" +
    //							"VALUES("+user_id+",'"+handle+"')";
    //					stmt6.executeUpdate(user_SQL);
    //					
    //				
    //
    //				}
    				// end of Transfer the Table-----------------------------USER---------
    				
    			} catch (SQLException e) {
    				e.printStackTrace();
    			}// END of database
    			
    			
    			
    
    			
    			
    	}
    }