Select Git revision
Clean_commented.java
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Clean_commented.java 19.64 KiB
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.sql.*;
import javax.naming.spi.DirStateFactory.Result;
class Tweet {// representing entity tweet
private String original_author;
private String tweet_text;
private String tweet_time;
private int tweet_ID;
private int retweet_count;
private int favorite_count;
// constructor
public Tweet(String original_author,
String tweet_text,
String tweet_time,
int tweet_ID,
int retweet_count,
int favorite_count){
this.original_author=original_author;
this.tweet_text=tweet_text;
this.tweet_time=tweet_time;
this.tweet_ID=tweet_ID;
this.retweet_count=retweet_count;
this.favorite_count=favorite_count;
}// end of constructor
public String get_original_author(){
return this.original_author;
}
public String get_tweet_text(){
return this.tweet_text;
}
public String get_tweet_time(){
return this.tweet_time;
}
public int get_tweet_ID(){
return this.tweet_ID;
}
public int get_retweet_count(){
return this.retweet_count;
}
public int get_favorite_count(){
return this.favorite_count;
}
}// end of class
class Hashtag {//representing entity hashtag from ER model
private int hashtag_ID;
private String hashtag_text;
public Hashtag(int hashtag_ID,String hashtag_text){
this.hashtag_ID=hashtag_ID;
this.hashtag_text=hashtag_text;
}// end of constructor
public int get_hashtag_ID(){
return this.hashtag_ID;
}
public String get_hashtag_text(){
return this.hashtag_text;
}
}// end of class hashtag
class User {//representing user entity from ER model
private String handle;
private int user_id;
public User(int user_id, String handle){
this.user_id=user_id;
this.handle=handle;
}// end of constructor
public String get_handle(){
return this.handle;
}
public int get_user_id(){
return this.user_id;
}
}// end of class user
class user2tweet { //representing user-tweet relation from ER model
private int user_id;
private int tweet_id;
public user2tweet(int user_id, int tweet_id){
this.user_id=user_id;
this.tweet_id=tweet_id;
}// end of constructor
public int get_user_id(){
return this.user_id;
}
public int get_tweet_id(){
return this.tweet_id;
}
}// end of class
class tweet2hashtag { //representing tweet-hashtag relation
private int tweet_id;
private int hashtag_id;
public tweet2hashtag(int tweet_id, int hashtag_id){
this.tweet_id=tweet_id;
this.hashtag_id=hashtag_id;
}
public int get_tweet_id(){
return this.tweet_id;
}
public int get_hashtag_id(){
return this.hashtag_id;
}
}
class tweet2Retweet{
int tweet_id_Original;
int tweet_id_Copy;
public tweet2Retweet(int tweet_id_Original,int tweet_id_Copy){
this.tweet_id_Original=tweet_id_Original;
this.tweet_id_Copy=tweet_id_Copy;
}// end of constructtor
public int get_tweet_id_Original() {
return this.tweet_id_Original;
}
public int get_tweet_id_Copy() {
return this.tweet_id_Copy;
}
}// end of class
//#############################################################################################################################################
public class Clean {
public ArrayList<Tweet> tweetList=new ArrayList<Tweet>();
public ArrayList<Hashtag> HashtagList=new ArrayList<Hashtag>();
public ArrayList<User> tweetUserList=new ArrayList<User>();
public ArrayList<String> userList=new ArrayList<String>();
public ArrayList<String> hashtagList_entire=new ArrayList<String> ();
public ArrayList<Tweet> get_tweetList(){
return this.tweetList;
}
public ArrayList<Hashtag> get_HashtagList(){
return this.HashtagList;
}
public ArrayList<User> get_tweetUserList(){
return this.tweetUserList;
}
public void printTweet(){
for(int i=0; i<this.tweetList.size();i++){
Tweet tweet=this.tweetList.get(i);
System.out.println(tweet.get_tweet_ID()+" "+tweet.get_tweet_text());
}
}
//#####################################################################################################################################
public static void justRead(String xfileLocation){ //reads content of a given file
Scanner scanIn=null;
String inputLine="";
int counter=0;
try {
scanIn=new Scanner (new BufferedReader(new FileReader(xfileLocation)));
scanIn.next();//skip header line
while (scanIn.hasNextLine()) {//while there are unread lines
inputLine = scanIn.nextLine();//read next line
System.out.println(counter+" "+inputLine);
counter++;//count number of lines read (minus header)
}
scanIn.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}// end of class
//############################################################################################################################################################
public static ArrayList<String> removeDup(ArrayList<String> hashList){//remove duplicate hashtags
ArrayList<String> temp= new ArrayList<String>();
for(int i=0; i<hashList.size();i++) {//iterate hashtag list
String hash=hashList.get(i);//check for every hashtag
if(temp.contains(hash)==false){//whether it has been read before
temp.add(hash);//if not, save it / otherwise discard
}// end of if
}// end of for
return temp;//return list of unique hashtags
}
public void concati(ArrayList<String> hashList) {// concatenate sublists of hashtags
for(int i=0; i<hashList.size();i++){
String hash=hashList.get(i);
if(this.hashtagList_entire.contains(hash)==false){
this.hashtagList_entire.add(hash);
}// end of if
}// end of while
}
public static int charToASCII(final char character) {//converts character to correspondin ASCII value
return (int)character;
}
public static boolean checkASCII(char chr) {//check for invalid (e.g. special) characters
int valASCII=(int)chr;
if( (valASCII>=65 && valASCII<=90) //capital Letter
|| (valASCII>=97 && valASCII<=122) // small letter
|| (valASCII>=48 && valASCII<=57)){// numbers
return true;//if char is either letter or number: return true (valid)
}else{
return false;//otherwise return false (reject)
}
}
public static ArrayList<String> identifyHash(String input) {//identify hashtags in string
int i=0;
int index_start_hashtag=0; //index of first character of hashtag
int index_end_hashtag=0; //index of last character of hashtag
ArrayList<String> hashList = new ArrayList<String>();//list of all hashtags found
for(i=0; i<input.length();i++) { //loop through input string (char array)
char sign=input.charAt(i);
if(sign=='#') {//if hashtag symbol is found
char sign2=input.charAt(i+1);
if(checkASCII(sign2)) { //check if next char following hashtag symbol is valid hashtag character
index_start_hashtag=i;
int count_length=i+1;//count length of hashtag
char iterate=0;
int j=0;
while(checkASCII(iterate)||(int)iterate==0) {//while next char belongs to hashtag
//
if(count_length==input.length()) {//if end of string is reached
j=count_length;
break; //stop
} else { //otherwise
iterate=input.charAt(count_length);//look at next character
j=count_length;
count_length++; //increment length
}
}// end of while
index_end_hashtag=j;
String hash=input.substring(index_start_hashtag,index_end_hashtag);//extract hashtag using indices
hashList.add(hash);//add hashtag to list
i=count_length;
} else {
//do nothing
}// end of if/ else
}
}// end of for
return hashList;//return
}// end of identifyHash
public static void displayHashes(ArrayList<String> hashList) {//print hashtag list
for(int i=0; i<hashList.size(); i++){
System.out.println(hashList.get(i));
}// end of for
}// end of displayHashes
public ArrayList<String> orderAlpha(ArrayList<String> hashList) {//order hashtags alphabetically? bubblesort?!
int size=hashList.size();
int i;
for(i=0; i<size;i++){
for(int j=i+1; j<size;j++){
if(hashList.get(i).compareTo(hashList.get(j))<0){
String tmp=hashList.get(i);
hashList.set(i, hashList.get(j));
hashList.set(j,tmp);
}
}//end of inner for loop
}//end of outer for loop
return hashList;
}
//############################################################################################################################################################
public static String replaceAllSemi(String str) {// replace
for(int i=0; i<str.length();i++){
char sign=str.charAt(i);
if(sign==';'){
str=str.substring(0,i-1)+","+str.substring(i+1);
}
}
return str;
}// end of replaceAllSemi
//################################################################################################
//
// public static String identifyOriginalAuthor(String tuple){
//
// }// end of identifyOriginalAuthor
//
//################################################################################################
public void readCSV(String xfileLocation) throws FileNotFoundException{
Scanner scanIn=null;
String inputLine="";
String tmp=null;
boolean mem_activ=false;
int counter=0;
int untilBoolean=0;
int subINDEXend=0;
int subINDEXstart=0;
//----------------------------------------------------------------
Tweet tweetTuple=null;
ArrayList<String> hashtagList_singleTuple=new ArrayList<String> ();
String originalAuthor="";
//----------------------------------------------------------------
String finaleTuple="";
scanIn=new Scanner (new BufferedReader(new FileReader(xfileLocation)));
scanIn.next();
while (scanIn.hasNextLine()){
inputLine = scanIn.nextLine();
if (inputLine.isEmpty() || inputLine.trim().equals("") || inputLine.trim().equals("\\n")) {
//-----------------------------------/ClEANING/-------------------------------------
} else {
if(mem_activ==true) {//if there was a ';' in tweet_text
inputLine=tmp.concat(" "+inputLine);//concatenate text substrings
mem_activ=false;//reset flag
}
if((inputLine.endsWith(";False")||inputLine.endsWith(";True"))==false){//detect substrings of tweet_text if text contains ';'
tmp=inputLine;//save first substring
mem_activ=true;//set flag
} else {
counter++;//represents IDs
subINDEXend=inputLine.indexOf(";2016");//find boundaries of tweet_text
subINDEXstart=inputLine.indexOf(";")+1;
String sub2Index=inputLine.substring(subINDEXstart,subINDEXend);
//System.out.println(counter+" "+sub2Index);
if(sub2Index.contains(";False;")){
untilBoolean=sub2Index.indexOf(";False;");
}else if (sub2Index.contains(";True;")){
untilBoolean=sub2Index.indexOf(";True;");
}
//endIndex=subINDEXstart+untilBoolean;
String tweetClean=sub2Index.substring(0,untilBoolean);
if(tweetClean.contains(";")) {//replace semicolons within text with ','
inputLine=replaceAllSemi(tweetClean);
}if(tweetClean.contains("'")){
inputLine=inputLine.replaceAll("'","''");
} else {}
finaleTuple=inputLine;
//---------------------------------------------------------------------------------------------------------------------
//
String[] values = finaleTuple.split(";");
//-----------------------------------/Generate instance ofTweet/------------------
if(values[2].equals("False")){//if tweet is no retweet
originalAuthor=values[0];//set original author to handle
}else{ //if tweet is retweet
originalAuthor=values[3];//set original author to value of eponymous field
}
String tweet_text=values[1];//set remaining variables
String tweet_time=values[4];
int ID=counter;
int retweet_count=Integer.parseInt(values[7]);
int favorite_count=Integer.parseInt(values[8]);
tweetTuple=new Tweet (originalAuthor, tweet_text, tweet_time, ID, retweet_count, favorite_count);//create Tweet instance
this.tweetList.add(tweetTuple);//add to list of tweets
this.userList.add(values[0]);// Stores all users
//
hashtagList_singleTuple=identifyHash(tweet_text);//extract hashtags
hashtagList_singleTuple=removeDup(hashtagList_singleTuple);//remove duplicates$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
concati(hashtagList_singleTuple); //add to list of all hashtags
}// end of while
//-----------------------------------/Generate instance of Hashtags/----------------------------------------------------------------------------------
this.hashtagList_entire=removeDup(this.hashtagList_entire);
Collections.sort(this.hashtagList_entire);
//keinedisplayHashes(hashtagList_entire);
for(int i=0; i<this.hashtagList_entire.size();i++) { //create hashtag instances
String hashtag=this.hashtagList_entire.get(i);
Hashtag hash=new Hashtag(i, hashtag);
HashtagList.add(hash);// stores all Hashtags (with alphabetical order) with ID
}
//-----------------------------------/Generate instance of User/------------------------------------
this.userList=removeDup(this.userList);
for(int i=0; i<this.userList.size(); i++) {//create user instances
String userNamer=this.userList.get(i);
User tweetUser=new User(i,userNamer );
tweetUserList.add(tweetUser);
}//end of for-loop
scanIn.close();
}}}//end of readCSV
//################################################################################################################################################################
//-----MAIN-----
//################################################################################################################################################################
public static void main(String[] args) throws IOException {
String xfileLocation2="/home/ubuntumac/Dropbox/TI2/DBS/M.csv";
ArrayList<Tweet> tweetList=null;
ArrayList<Hashtag> hashtagList=null;
ArrayList<User> userList=null;
Clean database=new Clean();
database.readCSV(xfileLocation2);
hashtagList=database.HashtagList;
tweetList=database.tweetList;
userList=database.tweetUserList;
int size_tweetList=tweetList.size();
int size_userList=userList.size();
System.out.println(size_userList);
//csv.export_Hashtag(database);
//csv.export_Tweet(database);
//csv.export_User(database);
// database.printTweet();
//######################################## test: identifyHash and removeDup
// ArrayList<String> hashList=null;
// ArrayList<String> ShorthashList=null;
// String input="zehdzh #Trump is a #motherfucker hzgf #Hillary saves America #Trump";
//
// System.out.println("Old: ");
// hashList=identifyHash(input);
// displayHashes(hashList);
// System.out.println("NEW: ");
// ShorthashList=removeDup(hashList);
// displayHashes(ShorthashList);
// char chr='m';
// int ascii=(int)chr;
// System.out.println(ascii);
//######################################## test: Export into DATABASE
String dbServer="localhost";
String dbPort="5432";
String dbName="Election";
String dbUser="postgres";
String password="mama";
// Connection to postgres
Connection conn=null;
Statement stmt1,stmt2,stmt3,stmt4,stmt5,stmt6, stmt8;
PreparedStatement stmt7=null;
try{
conn= DriverManager.getConnection("jdbc:postgresql://" + dbServer + ":" + dbPort + "/" + dbName, dbUser , password);
System.out.println("Connection to database " + dbName + "@" + dbServer + ":" + dbPort + " successfully established.");
}catch(SQLException sqle){
System.out.println("The connection could not be established.");
sqle.printStackTrace();
System.exit(0);
}
// Set-up Tables
try {
stmt1=conn.createStatement();
stmt2=conn.createStatement();
stmt3=conn.createStatement();
stmt4=conn.createStatement();
stmt5=conn.createStatement();
stmt6=conn.createStatement();
stmt8=conn.createStatement();
String Tweet_SQLNew="INSERT INTO tweet(id ,retweet_count, favorite_count, tweet_text, tweet_time, original_author )" +
"VALUES(?,?,?,?,?,?)";
String Tweet_SQLNew2="INSERT INTO tweet(id ,retweet_count, favorite_count, tweet_text, original_author )" +
"VALUES(?,?,?,?,?)";
stmt7=conn.prepareStatement(Tweet_SQLNew);
//------------CREATE the Table-----------------------------tweet---------
String creat_tweet="CREATE TABLE tweet (id serial NOT NULL," +
"retweet_count int NOT NULL, " +
"favorite_count int NOT NULL, " +
"tweet_text text NOT NULL," +
"tweet_time timestamp," +
"original_author text NOT NULL," +
"PRIMARY KEY (id),"+
"CONSTRAINT vorgaenger_ID FOREIGN KEY(id) REFERENCES tweet(id))";
stmt1.executeUpdate(creat_tweet);
//------------CREATE the Table-----------------------------tweeter_user---------
// String creat_user="CREATE TABLE tweeter_user (id serial NOT NULL," +
// "handle text NOT NULL, " +
// "PRIMARY KEY (id))";
// stmt2.executeUpdate(creat_user);
////------------CREATE the Table-----------------------------tweeter_hashtag---------
// String creat_hashtags="CREATE TABLE hashtags (id serial NOT NULL," +
// "hashtag_text text NOT NULL," +
// "PRIMARY KEY (id))";
// stmt3.executeUpdate(creat_hashtags);
//
//------------Transfer the Table-----------------------------hashTag---------
// for(int i=0; i<size_hashtagList;i++){
// Hashtag tmpHash=hashtagList.get(i);
//
// String ID=Integer.toString(tmpHash.get_hashtag_ID());
// String hashtag_text=tmpHash.get_hashtag_text();
//
//
// String hashtag_SQL="INSERT INTO hashtags(id, hashtag_text)" +
// "VALUES("+ID+",'"+hashtag_text+"')";
// stmt4.executeUpdate(hashtag_SQL);
// }
//------------Transfer the Table-----------------------------Tweet---------
for(int i=0; i<size_tweetList;i++){
Tweet tmpTweet=tweetList.get(i);
int tweet_ID=tmpTweet.get_tweet_ID();
String tweet_text=tmpTweet.get_tweet_text();
String original_author=tmpTweet.get_original_author();
String tweet_time=tmpTweet.get_tweet_time();
int retweet_count=tmpTweet.get_retweet_count();
int favorite_count=tmpTweet.get_favorite_count();
// String Tweet_SQL="INSERT INTO tweet(id ,retweet_count, favorite_count, tweet_text, tweet_time, original_author )" +
// "VALUES("+tweet_ID+","
// +retweet_count+","
// +favorite_count+",'"
// +tweet_text+"','"
// +tweet_time+"','"
// +original_author+"')";
//
stmt7.setInt(1, tweet_ID);
stmt7.setInt(2, retweet_count);
stmt7.setInt(3, favorite_count);
stmt7.setTimestamp(5, null);
stmt7.setString(4, tweet_text);
stmt7.setString(6, original_author);
stmt7.executeUpdate();
String Tweet_SQL="UPDATE tweet SET tweet_time='"+tweet_time+"' WHERE id="+tweet_ID;
stmt5.executeUpdate(Tweet_SQL);
}
//------------Transfer the Table-----------------------------USER---------
// for(int i=0; i<size_userList;i++){
// User tmpUser=userList.get(i);
// String user_id=Integer.toString(tmpUser.user_id);
// String handle=tmpUser.handle;
//
// String user_SQL="INSERT INTO tweeter_user(id, handle)" +
// "VALUES("+user_id+",'"+handle+"')";
// stmt6.executeUpdate(user_SQL);
//
//
//
// }
// end of Transfer the Table-----------------------------USER---------
} catch (SQLException e) {
e.printStackTrace();
}// END of database
}
}