Upload New File

26cf8353 · vivekanas92 · 249e4895 · 26cf8353
Commit 26cf8353 authored 5 years ago by vivekanas92
--- a/project_1.py
+++ b/project_1.py
+# -*- coding: utf-8 -*-
+"""Project 1.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1KSx0ddnNUMpr-SioA0Jl4Df92rvf6O39
+"""
+
+# Commented out IPython magic to ensure Python compatibility.
+import numpy as np # linear algebra
+import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
+import seaborn as sns
+import matplotlib.pyplot as plt
+# %matplotlib inline
+
+import warnings
+warnings.filterwarnings("ignore")
+
+df = pd.read_csv("processed.cleveland.data")
+df.info()
+
+df.head()
+
+df.columns = ['age', 'sex', 'cp','trestbps','chol','fbs','restecg','thalach',' exang','oldpeak','slope','ca','thal','target']
+
+df.head()
+
+
+
+df = df.drop('ca', 1)
+df = df.drop('thal', 1)
+
+df.head()
+
+corr = df.corr()['target'].abs().sort_values()
+corr
+
+# Helper function for plotting side by side
+def sideplot(df, col, kind="bar", title=None):
+    assert kind in ["bar", "hist"]
+    fig = plt.figure(figsize=(10, 6))
+    if kind == "bar":
+        ax1 = plt.subplot(2, 2, 1)
+        df[df.target == 0][['target', col]].groupby(col).count().plot(kind='bar', rot=0, legend=False, ax=ax1, color="#268bd2")
+        ax2 = plt.subplot(2, 2, 2)
+        df[df.target == 1][['target', col]].groupby(col).count().plot(kind='bar', rot=0, legend=False, ax=ax2, color="#268bd2")
+        
+        ax3 = plt.subplot(2, 2, 3)
+        df[df.target == 2][['target', col]].groupby(col).count().plot(kind='bar', rot=0, legend=False, ax=ax3, color="#268bd2")
+        ax4 = plt.subplot(2, 2, 4)
+        df[df.target == 3][['target', col]].groupby(col).count().plot(kind='bar', rot=0, legend=False, ax=ax4, color="#268bd2")
+    else:
+        ax1 = plt.subplot(2, 2, 1)
+        plt.hist(df[df.target == 1][col], color="#268bd2")
+        plt.xlabel(col)
+        
+        ax2 = plt.subplot(2, 2, 2)
+        plt.hist(df[df.target == 0][col], color="#d2264d")
+        plt.xlabel(col)
+        
+        ax3 = plt.subplot(2, 2, 3)
+        plt.hist(df[df.target == 1][col], color="#26d229")
+        plt.xlabel(col)
+        
+        ax4 = plt.subplot(2, 2, 4)
+        plt.hist(df[df.target == 0][col], color="#d2c526")
+        plt.xlabel(col)
+        
+
+    # Re-adjusting
+    ylim = (0, max(ax1.get_ylim()[1], ax2.get_ylim()[1]))
+    ax1.set_ylim(ylim)
+    ax2.set_ylim(ylim)
+    xlim = (min(ax1.get_xlim()[0], ax2.get_xlim()[0]), max(ax1.get_xlim()[1], ax2.get_xlim()[1]))
+    ax1.set_xlim(xlim)
+    ax2.set_xlim(xlim)
+    if title is not None:
+        fig.suptitle(title)
+    #plt.subplots_adjust(top=0.99)
+
+sideplot(df, "fbs", kind="hist", title="Comparison of fasting blood sugar")
+
+sideplot(df, "chol", kind="hist", title="Comparison of serum cholestoral")
+
+sideplot(df, "restecg", kind="bar", title="Comparison of resting ECG results")
+
+sideplot(df, "cp", kind="hist", title="Comparison of chest pain type ")
+
+sideplot(df, "thalach", kind="bar", title="Comparison of  maximum heart rate achieved")
+
+
+
+sideplot(df, "slope", kind="bar", title="Comparison of the slope of the peak exercise ST segment")
+
+sideplot(df, "age", kind="hist", title="Comparison of age")
+
+sideplot(df, "sex", kind="bar", title="sex")
+
+
+
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
+clf = LDA(n_components=1)
+
+y = df["target"].values
+X = clf.fit(df[df.columns[:-1]].values, y).transform(df[df.columns[:-1]].values)
+X = X[:, 0]
+
+sns.swarmplot(X[y == 0], color="b", label="with HD 100")
+sns.swarmplot(X[y == 1], color="r", label="with HD 75")
+sns.swarmplot(X[y == 2], color="y", label="without HD 50")
+sns.swarmplot(X[y == 3], color="g", label="with HD 25")
+sns.swarmplot(X[y == 4], color="c", label="without HD 00")
+
+plt.title("LDA analysis of heart disease classification")
+plt.legend()
\ No newline at end of file