Skip to content
Snippets Groups Projects
Commit 26cf8353 authored by vivekanas92's avatar vivekanas92
Browse files

Upload New File

parent 249e4895
No related branches found
No related tags found
No related merge requests found
# -*- coding: utf-8 -*-
"""Project 1.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1KSx0ddnNUMpr-SioA0Jl4Df92rvf6O39
"""
# Commented out IPython magic to ensure Python compatibility.
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
# %matplotlib inline
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv("processed.cleveland.data")
df.info()
df.head()
df.columns = ['age', 'sex', 'cp','trestbps','chol','fbs','restecg','thalach',' exang','oldpeak','slope','ca','thal','target']
df.head()
df = df.drop('ca', 1)
df = df.drop('thal', 1)
df.head()
corr = df.corr()['target'].abs().sort_values()
corr
# Helper function for plotting side by side
def sideplot(df, col, kind="bar", title=None):
assert kind in ["bar", "hist"]
fig = plt.figure(figsize=(10, 6))
if kind == "bar":
ax1 = plt.subplot(2, 2, 1)
df[df.target == 0][['target', col]].groupby(col).count().plot(kind='bar', rot=0, legend=False, ax=ax1, color="#268bd2")
ax2 = plt.subplot(2, 2, 2)
df[df.target == 1][['target', col]].groupby(col).count().plot(kind='bar', rot=0, legend=False, ax=ax2, color="#268bd2")
ax3 = plt.subplot(2, 2, 3)
df[df.target == 2][['target', col]].groupby(col).count().plot(kind='bar', rot=0, legend=False, ax=ax3, color="#268bd2")
ax4 = plt.subplot(2, 2, 4)
df[df.target == 3][['target', col]].groupby(col).count().plot(kind='bar', rot=0, legend=False, ax=ax4, color="#268bd2")
else:
ax1 = plt.subplot(2, 2, 1)
plt.hist(df[df.target == 1][col], color="#268bd2")
plt.xlabel(col)
ax2 = plt.subplot(2, 2, 2)
plt.hist(df[df.target == 0][col], color="#d2264d")
plt.xlabel(col)
ax3 = plt.subplot(2, 2, 3)
plt.hist(df[df.target == 1][col], color="#26d229")
plt.xlabel(col)
ax4 = plt.subplot(2, 2, 4)
plt.hist(df[df.target == 0][col], color="#d2c526")
plt.xlabel(col)
# Re-adjusting
ylim = (0, max(ax1.get_ylim()[1], ax2.get_ylim()[1]))
ax1.set_ylim(ylim)
ax2.set_ylim(ylim)
xlim = (min(ax1.get_xlim()[0], ax2.get_xlim()[0]), max(ax1.get_xlim()[1], ax2.get_xlim()[1]))
ax1.set_xlim(xlim)
ax2.set_xlim(xlim)
if title is not None:
fig.suptitle(title)
#plt.subplots_adjust(top=0.99)
sideplot(df, "fbs", kind="hist", title="Comparison of fasting blood sugar")
sideplot(df, "chol", kind="hist", title="Comparison of serum cholestoral")
sideplot(df, "restecg", kind="bar", title="Comparison of resting ECG results")
sideplot(df, "cp", kind="hist", title="Comparison of chest pain type ")
sideplot(df, "thalach", kind="bar", title="Comparison of maximum heart rate achieved")
sideplot(df, "slope", kind="bar", title="Comparison of the slope of the peak exercise ST segment")
sideplot(df, "age", kind="hist", title="Comparison of age")
sideplot(df, "sex", kind="bar", title="sex")
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
clf = LDA(n_components=1)
y = df["target"].values
X = clf.fit(df[df.columns[:-1]].values, y).transform(df[df.columns[:-1]].values)
X = X[:, 0]
sns.swarmplot(X[y == 0], color="b", label="with HD 100")
sns.swarmplot(X[y == 1], color="r", label="with HD 75")
sns.swarmplot(X[y == 2], color="y", label="without HD 50")
sns.swarmplot(X[y == 3], color="g", label="with HD 25")
sns.swarmplot(X[y == 4], color="c", label="without HD 00")
plt.title("LDA analysis of heart disease classification")
plt.legend()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment