diff --git a/Assignment02_nb.ipynb b/Assignment02_nb.ipynb deleted file mode 100644 index ee125b320ecd7ff64b22342bb5cf997c6f2c89b2..0000000000000000000000000000000000000000 --- a/Assignment02_nb.ipynb +++ /dev/null @@ -1,288 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Assignment Sheet 2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook is part of the 2nd assignment. We will train a Decision Tree classifier using the sklearn library on the Iris dataset.\n", - "\n", - "Your task is to complete the missing code, where marked with a **TODO**. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import seaborn as sns\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import graphviz\n", - "\n", - "from sklearn import datasets, tree\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.metrics import classification_report\n", - "from os import system\n", - "from IPython.display import Image\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# For further info https://archive.ics.uci.edu/ml/datasets/iris\n", - "iris = datasets.load_iris()\n", - "X = iris.data \n", - "y = iris.target" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Exploratory data analysis" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Quick look into the data structure" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(X.shape)\n", - "print(y.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(X[:5,:])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Using pandas\n", - "data = pd.concat([pd.DataFrame(X),pd.DataFrame(y)], axis=1)\n", - "data.columns=['a','b','c','d','target']\n", - "data.head(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Exemplary plots" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(8,6))\n", - "sns.scatterplot(x=X[:,0], y=X[:,1], hue=y)\n", - "plt.xlabel('Sepal length')\n", - "plt.ylabel('Sepal width')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Univariate hist_plot 'sepal_length'\n", - "class0_index = [i for i, j in enumerate(y) if j==0]\n", - "class1_index = [i for i, j in enumerate(y) if j==1]\n", - "class2_index = [i for i, j in enumerate(y) if j==2]\n", - "\n", - "sns.histplot(data=X, x=X[:,0], hue=y, element='step')\n", - "plt.xlabel('Sepal length')\n", - "plt.legend(('class1', 'class2','class3'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: Barplot over 'sepal-width'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: Boxplot of all features" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Classification using decision trees " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Data preparation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Split data\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", - "X_train.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Train DT classifier using sklearn; Visualization; Evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: Train a DT classifier " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Visualize: Export to .png image file\n", - "tree.export_graphviz(clf, out_file='tree.dot') \n", - "system(\"dot -Tpng tree.dot -o tree1.png\")\n", - "Image(\"tree1.png\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: Evaluation the classifier's performance" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Train a second DT classifier using the Entropy instead of the Gini-Index (default)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: Train the second classifier" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Visualize #2\n", - "tree.export_graphviz(clf2, out_file='tree2.dot') \n", - "system(\"dot -Tpng tree2.dot -o tree2.png\")\n", - "Image(\"tree2.png\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO: Evaluation" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}