From 32780a1ae86b9d70665b324e7f9ae0f36a6889e0 Mon Sep 17 00:00:00 2001 From: aakan96 <aakan96@mi.fu-berlin.de> Date: Tue, 11 Jul 2023 18:59:06 +0000 Subject: [PATCH] Neue Datei hochladen --- .../DS_miRNA_limma_dataset_xgb_final.ipynb | 1544 +++++++++++++++++ 1 file changed, 1544 insertions(+) create mode 100644 Machine Learning/DS_miRNA_limma_dataset_xgb_final.ipynb diff --git a/Machine Learning/DS_miRNA_limma_dataset_xgb_final.ipynb b/Machine Learning/DS_miRNA_limma_dataset_xgb_final.ipynb new file mode 100644 index 0000000..549f586 --- /dev/null +++ b/Machine Learning/DS_miRNA_limma_dataset_xgb_final.ipynb @@ -0,0 +1,1544 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 257, + "id": "f097ad55", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "#from sklearn.model_selection import cross_val_score\n", + "#from sklearn.metrics import accuracy_score\n", + "#import sklearn.metrics as metrics\n", + "#from sklearn.metrics import auc\n", + "from sklearn.metrics import RocCurveDisplay\n", + "#from sklearn.model_selection import KFold\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from imblearn.over_sampling import SMOTE\n", + "from sklearn.linear_model import Lasso\n", + "import xgboost as xgb\n", + "from sklearn.model_selection import GridSearchCV\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "#np.random.seed(7)" + ] + }, + { + "cell_type": "markdown", + "id": "73b6611a", + "metadata": {}, + "source": [ + "# Data Preprocessing" + ] + }, + { + "cell_type": "code", + "execution_count": 258, + "id": "0eeb7a35", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"DS/miRNA_DS_preprocessed_data.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "id": "0dd80c33", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Row.names</th>\n", + " <th>GSM1069774</th>\n", + " <th>GSM1069775</th>\n", + " <th>GSM1069776</th>\n", + " <th>GSM1069777</th>\n", + " <th>GSM1069778</th>\n", + " <th>GSM1069779</th>\n", + " <th>GSM1069780</th>\n", + " <th>GSM1069781</th>\n", + " <th>GSM1069782</th>\n", + " <th>...</th>\n", + " <th>GSM1070002</th>\n", + " <th>GSM1070003</th>\n", + " <th>GSM1070004</th>\n", + " <th>GSM1070005</th>\n", + " <th>GSM1070006</th>\n", + " <th>GSM1070007</th>\n", + " <th>GSM1070008</th>\n", + " <th>GSM1070009</th>\n", + " <th>GSM1070010</th>\n", + " <th>GSM1070011</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>dmr_3</td>\n", + " <td>0.732675</td>\n", + " <td>0.249772</td>\n", + " <td>0.400779</td>\n", + " <td>0.380263</td>\n", + " <td>0.422207</td>\n", + " <td>0.195084</td>\n", + " <td>0.539051</td>\n", + " <td>0.949943</td>\n", + " <td>0.860827</td>\n", + " <td>...</td>\n", + " <td>2.718250</td>\n", + " <td>2.571045</td>\n", + " <td>0.421015</td>\n", + " <td>0.843205</td>\n", + " <td>0.991408</td>\n", + " <td>0.987970</td>\n", + " <td>-0.194781</td>\n", + " <td>0.212180</td>\n", + " <td>0.330997</td>\n", + " <td>0.474815</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>dmr_31a</td>\n", + " <td>-0.242559</td>\n", + " <td>-0.655514</td>\n", + " <td>-0.597444</td>\n", + " <td>-0.900491</td>\n", + " <td>-0.414831</td>\n", + " <td>-0.805359</td>\n", + " <td>-0.517633</td>\n", + " <td>-0.223316</td>\n", + " <td>-0.136347</td>\n", + " <td>...</td>\n", + " <td>-0.773905</td>\n", + " <td>-0.997509</td>\n", + " <td>-0.345417</td>\n", + " <td>0.082461</td>\n", + " <td>-0.000979</td>\n", + " <td>-0.118186</td>\n", + " <td>-0.710519</td>\n", + " <td>-0.284657</td>\n", + " <td>-0.194460</td>\n", + " <td>0.043697</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>dmr_6</td>\n", + " <td>0.577801</td>\n", + " <td>0.104933</td>\n", + " <td>0.232702</td>\n", + " <td>0.243207</td>\n", + " <td>-0.000781</td>\n", + " <td>-0.242663</td>\n", + " <td>0.123341</td>\n", + " <td>0.529106</td>\n", + " <td>0.879854</td>\n", + " <td>...</td>\n", + " <td>2.501101</td>\n", + " <td>2.411094</td>\n", + " <td>0.195552</td>\n", + " <td>0.633497</td>\n", + " <td>0.747960</td>\n", + " <td>0.750199</td>\n", + " <td>-0.700226</td>\n", + " <td>-0.324720</td>\n", + " <td>-0.206405</td>\n", + " <td>-0.102511</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>ebv-miR-BART13</td>\n", + " <td>-4.469532</td>\n", + " <td>-5.209572</td>\n", + " <td>-4.952808</td>\n", + " <td>-4.892073</td>\n", + " <td>-5.139127</td>\n", + " <td>-5.743958</td>\n", + " <td>-5.071777</td>\n", + " <td>-4.925431</td>\n", + " <td>-4.502912</td>\n", + " <td>...</td>\n", + " <td>-4.802387</td>\n", + " <td>-4.673440</td>\n", + " <td>-4.663603</td>\n", + " <td>-4.592706</td>\n", + " <td>-4.339199</td>\n", + " <td>-4.572984</td>\n", + " <td>-5.651293</td>\n", + " <td>-4.800142</td>\n", + " <td>-4.840442</td>\n", + " <td>-4.849285</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>hsa-let-7c</td>\n", + " <td>1.195899</td>\n", + " <td>0.498366</td>\n", + " <td>1.081166</td>\n", + " <td>-0.023958</td>\n", + " <td>1.077485</td>\n", + " <td>0.753565</td>\n", + " <td>1.024742</td>\n", + " <td>1.257121</td>\n", + " <td>0.001818</td>\n", + " <td>...</td>\n", + " <td>1.407490</td>\n", + " <td>1.420378</td>\n", + " <td>0.105924</td>\n", + " <td>1.809773</td>\n", + " <td>1.889811</td>\n", + " <td>0.696251</td>\n", + " <td>0.742722</td>\n", + " <td>1.006200</td>\n", + " <td>1.521159</td>\n", + " <td>1.239637</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>225</th>\n", + " <td>hur_4</td>\n", + " <td>3.822267</td>\n", + " <td>3.529789</td>\n", + " <td>3.496675</td>\n", + " <td>3.541502</td>\n", + " <td>3.305132</td>\n", + " <td>2.897285</td>\n", + " <td>3.373274</td>\n", + " <td>4.020815</td>\n", + " <td>3.502169</td>\n", + " <td>...</td>\n", + " <td>2.892524</td>\n", + " <td>2.232009</td>\n", + " <td>3.598718</td>\n", + " <td>4.631603</td>\n", + " <td>4.421315</td>\n", + " <td>3.899704</td>\n", + " <td>2.788619</td>\n", + " <td>3.125750</td>\n", + " <td>3.365475</td>\n", + " <td>3.514036</td>\n", + " </tr>\n", + " <tr>\n", + " <th>226</th>\n", + " <td>hur_5</td>\n", + " <td>-2.268209</td>\n", + " <td>-2.656642</td>\n", + " <td>-2.676555</td>\n", + " <td>-3.073553</td>\n", + " <td>-2.964948</td>\n", + " <td>-3.123546</td>\n", + " <td>-2.921738</td>\n", + " <td>-2.338717</td>\n", + " <td>-1.998436</td>\n", + " <td>...</td>\n", + " <td>-2.590625</td>\n", + " <td>-2.870981</td>\n", + " <td>-2.678576</td>\n", + " <td>-1.734258</td>\n", + " <td>-2.292801</td>\n", + " <td>-2.954284</td>\n", + " <td>-3.103706</td>\n", + " <td>-2.917537</td>\n", + " <td>-2.736411</td>\n", + " <td>-2.931018</td>\n", + " </tr>\n", + " <tr>\n", + " <th>227</th>\n", + " <td>hur_6</td>\n", + " <td>5.114399</td>\n", + " <td>4.327117</td>\n", + " <td>4.616284</td>\n", + " <td>4.581648</td>\n", + " <td>4.487481</td>\n", + " <td>4.176481</td>\n", + " <td>4.655614</td>\n", + " <td>5.072622</td>\n", + " <td>5.218395</td>\n", + " <td>...</td>\n", + " <td>5.283555</td>\n", + " <td>5.058830</td>\n", + " <td>4.912666</td>\n", + " <td>6.425631</td>\n", + " <td>6.133770</td>\n", + " <td>5.505105</td>\n", + " <td>4.340513</td>\n", + " <td>4.838599</td>\n", + " <td>5.185601</td>\n", + " <td>4.798139</td>\n", + " </tr>\n", + " <tr>\n", + " <th>228</th>\n", + " <td>miRNABrightCorner30</td>\n", + " <td>2.017444</td>\n", + " <td>2.022346</td>\n", + " <td>1.498011</td>\n", + " <td>0.789822</td>\n", + " <td>1.219583</td>\n", + " <td>1.405472</td>\n", + " <td>1.224923</td>\n", + " <td>1.481480</td>\n", + " <td>2.488058</td>\n", + " <td>...</td>\n", + " <td>1.023877</td>\n", + " <td>0.361653</td>\n", + " <td>1.780362</td>\n", + " <td>1.898003</td>\n", + " <td>2.210791</td>\n", + " <td>2.457963</td>\n", + " <td>0.232713</td>\n", + " <td>0.863574</td>\n", + " <td>0.846454</td>\n", + " <td>2.089520</td>\n", + " </tr>\n", + " <tr>\n", + " <th>229</th>\n", + " <td>mr_1</td>\n", + " <td>1.640437</td>\n", + " <td>0.794260</td>\n", + " <td>1.584544</td>\n", + " <td>1.255367</td>\n", + " <td>0.951615</td>\n", + " <td>0.803260</td>\n", + " <td>1.139880</td>\n", + " <td>1.863920</td>\n", + " <td>2.059302</td>\n", + " <td>...</td>\n", + " <td>1.008895</td>\n", + " <td>0.539606</td>\n", + " <td>2.092973</td>\n", + " <td>3.013288</td>\n", + " <td>2.858038</td>\n", + " <td>2.142301</td>\n", + " <td>1.067806</td>\n", + " <td>1.203499</td>\n", + " <td>1.604729</td>\n", + " <td>1.597958</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>230 rows × 239 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Row.names GSM1069774 GSM1069775 GSM1069776 GSM1069777 \\\n", + "0 dmr_3 0.732675 0.249772 0.400779 0.380263 \n", + "1 dmr_31a -0.242559 -0.655514 -0.597444 -0.900491 \n", + "2 dmr_6 0.577801 0.104933 0.232702 0.243207 \n", + "3 ebv-miR-BART13 -4.469532 -5.209572 -4.952808 -4.892073 \n", + "4 hsa-let-7c 1.195899 0.498366 1.081166 -0.023958 \n", + ".. ... ... ... ... ... \n", + "225 hur_4 3.822267 3.529789 3.496675 3.541502 \n", + "226 hur_5 -2.268209 -2.656642 -2.676555 -3.073553 \n", + "227 hur_6 5.114399 4.327117 4.616284 4.581648 \n", + "228 miRNABrightCorner30 2.017444 2.022346 1.498011 0.789822 \n", + "229 mr_1 1.640437 0.794260 1.584544 1.255367 \n", + "\n", + " GSM1069778 GSM1069779 GSM1069780 GSM1069781 GSM1069782 ... \\\n", + "0 0.422207 0.195084 0.539051 0.949943 0.860827 ... \n", + "1 -0.414831 -0.805359 -0.517633 -0.223316 -0.136347 ... \n", + "2 -0.000781 -0.242663 0.123341 0.529106 0.879854 ... \n", + "3 -5.139127 -5.743958 -5.071777 -4.925431 -4.502912 ... \n", + "4 1.077485 0.753565 1.024742 1.257121 0.001818 ... \n", + ".. ... ... ... ... ... ... \n", + "225 3.305132 2.897285 3.373274 4.020815 3.502169 ... \n", + "226 -2.964948 -3.123546 -2.921738 -2.338717 -1.998436 ... \n", + "227 4.487481 4.176481 4.655614 5.072622 5.218395 ... \n", + "228 1.219583 1.405472 1.224923 1.481480 2.488058 ... \n", + "229 0.951615 0.803260 1.139880 1.863920 2.059302 ... \n", + "\n", + " GSM1070002 GSM1070003 GSM1070004 GSM1070005 GSM1070006 GSM1070007 \\\n", + "0 2.718250 2.571045 0.421015 0.843205 0.991408 0.987970 \n", + "1 -0.773905 -0.997509 -0.345417 0.082461 -0.000979 -0.118186 \n", + "2 2.501101 2.411094 0.195552 0.633497 0.747960 0.750199 \n", + "3 -4.802387 -4.673440 -4.663603 -4.592706 -4.339199 -4.572984 \n", + "4 1.407490 1.420378 0.105924 1.809773 1.889811 0.696251 \n", + ".. ... ... ... ... ... ... \n", + "225 2.892524 2.232009 3.598718 4.631603 4.421315 3.899704 \n", + "226 -2.590625 -2.870981 -2.678576 -1.734258 -2.292801 -2.954284 \n", + "227 5.283555 5.058830 4.912666 6.425631 6.133770 5.505105 \n", + "228 1.023877 0.361653 1.780362 1.898003 2.210791 2.457963 \n", + "229 1.008895 0.539606 2.092973 3.013288 2.858038 2.142301 \n", + "\n", + " GSM1070008 GSM1070009 GSM1070010 GSM1070011 \n", + "0 -0.194781 0.212180 0.330997 0.474815 \n", + "1 -0.710519 -0.284657 -0.194460 0.043697 \n", + "2 -0.700226 -0.324720 -0.206405 -0.102511 \n", + "3 -5.651293 -4.800142 -4.840442 -4.849285 \n", + "4 0.742722 1.006200 1.521159 1.239637 \n", + ".. ... ... ... ... \n", + "225 2.788619 3.125750 3.365475 3.514036 \n", + "226 -3.103706 -2.917537 -2.736411 -2.931018 \n", + "227 4.340513 4.838599 5.185601 4.798139 \n", + "228 0.232713 0.863574 0.846454 2.089520 \n", + "229 1.067806 1.203499 1.604729 1.597958 \n", + "\n", + "[230 rows x 239 columns]" + ] + }, + "execution_count": 259, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "id": "6e7836e1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(230, 239)" + ] + }, + "execution_count": 260, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "id": "683b63ce", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e78017d", + "metadata": {}, + "outputs": [], + "source": [ + "#Transform the input data\n", + "df.rename(columns=df.iloc[0], inplace = True)\n", + "df.drop(df.index[0], inplace = True)\n", + "df=df.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1647a959", + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c50c510", + "metadata": {}, + "outputs": [], + "source": [ + "metadata = pd.read_csv(\"DS/miRNA_DS_metadata_col_info.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55f4abc3", + "metadata": {}, + "outputs": [], + "source": [ + "metadata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6730cf89", + "metadata": {}, + "outputs": [], + "source": [ + "df= df.merge(metadata, left_on=\"index\", right_on= \"Unnamed: 0\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e32e7310", + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a8ad8ad", + "metadata": {}, + "outputs": [], + "source": [ + "df['title0'] = df['title0'].replace('(?i)mucosa|normal|healthy', 0, regex=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8cf8643", + "metadata": {}, + "outputs": [], + "source": [ + "df['title0'] = df['title0'].replace('(?i)Tumor|Cancer|carcinoma', 1, regex=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c852a3f", + "metadata": {}, + "outputs": [], + "source": [ + "df['title0'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5d203aa", + "metadata": {}, + "outputs": [], + "source": [ + "df = df[pd.to_numeric(df['title0'], errors='coerce').notnull()]#remove all non-numeric data from the column." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d8882a3", + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "523bdaa6", + "metadata": {}, + "outputs": [], + "source": [ + "df= df.drop(['index', 'Unnamed: 0'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "46a6fb36", + "metadata": {}, + "outputs": [], + "source": [ + "df= df.rename(columns={\"title0\": \"index\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e26f88c5", + "metadata": {}, + "outputs": [], + "source": [ + "df['index'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fbaf2507", + "metadata": {}, + "outputs": [], + "source": [ + "df= df.apply(pd.to_numeric)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3f7adb5", + "metadata": {}, + "outputs": [], + "source": [ + "df['index'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a50f416", + "metadata": {}, + "outputs": [], + "source": [ + "X=df.drop(\"index\",axis=1)\n", + "y=df['index']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e644ab0e", + "metadata": {}, + "outputs": [], + "source": [ + "y=y.astype('int')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4ad97bb", + "metadata": {}, + "outputs": [], + "source": [ + "X" + ] + }, + { + "cell_type": "markdown", + "id": "6cee6462", + "metadata": {}, + "source": [ + "# Test train split" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1da48142", + "metadata": {}, + "outputs": [], + "source": [ + "# split data into training and testing data-sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=7)" + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "id": "129430e6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(index\n", + " 1 61\n", + " 0 58\n", + " Name: count, dtype: int64,\n", + " index\n", + " 0 61\n", + " 1 58\n", + " Name: count, dtype: int64)" + ] + }, + "execution_count": 263, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.value_counts(),y_train.value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "1cfe2a06", + "metadata": {}, + "source": [ + "# Cross validation" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "id": "d3550b5e", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 36 candidates, totalling 180 fits\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 1.0s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.8s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.2s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.2s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.2s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.2s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.2s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.2s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV 4/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.2s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.2s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.2s\n", + "[CV 2/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.5, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.2s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.2s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.2s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.2s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.2s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.2s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV 2/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.2s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.3s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.2s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.3s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.2s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.2s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.2s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.2s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.2s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.2s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.2s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.2s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.958 total time= 0.1s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV 4/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time= 0.1s\n", + "[CV 2/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 3/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.958 total time= 0.1s\n", + "[CV 4/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n", + "[CV 5/5] END colsample_bytree=0.8, gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time= 0.1s\n" + ] + }, + { + "data": { + "text/html": [ + "<style>#sk-container-id-15 {color: black;background-color: white;}#sk-container-id-15 pre{padding: 0;}#sk-container-id-15 div.sk-toggleable {background-color: white;}#sk-container-id-15 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-15 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-15 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-15 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-15 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-15 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-15 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-15 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-15 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-15 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-15 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-15 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-15 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-15 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-15 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-15 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-15 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-15 div.sk-item {position: relative;z-index: 1;}#sk-container-id-15 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-15 div.sk-item::before, #sk-container-id-15 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-15 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-15 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-15 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-15 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-15 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-15 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-15 div.sk-label-container {text-align: center;}#sk-container-id-15 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-15 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-15\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,\n", + " callbacks=None, colsample_bylevel=None,\n", + " colsample_bynode=None,\n", + " colsample_bytree=None,\n", + " early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None,\n", + " feature_types=None, gamma=None,\n", + " gpu_id=None, grow_policy=None,\n", + " importance_type=None,\n", + " interaction_constraints=None,\n", + " learning_rate=None, max_b...\n", + " max_delta_step=None, max_depth=None,\n", + " max_leaves=None, min_child_weight=None,\n", + " missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None,\n", + " num_parallel_tree=None, predictor=None,\n", + " random_state=42, ...),\n", + " param_grid={'colsample_bytree': [0.5, 0.8],\n", + " 'gamma': [0.1, 0.01, 0.001],\n", + " 'learning_rate': [0.1, 0.01, 0.001], 'max_depth': [3],\n", + " 'n_estimators': [100], 'subsample': [0.8, 1.0]},\n", + " verbose=3)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-29\" type=\"checkbox\" ><label for=\"sk-estimator-id-29\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">GridSearchCV</label><div class=\"sk-toggleable__content\"><pre>GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,\n", + " callbacks=None, colsample_bylevel=None,\n", + " colsample_bynode=None,\n", + " colsample_bytree=None,\n", + " early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None,\n", + " feature_types=None, gamma=None,\n", + " gpu_id=None, grow_policy=None,\n", + " importance_type=None,\n", + " interaction_constraints=None,\n", + " learning_rate=None, max_b...\n", + " max_delta_step=None, max_depth=None,\n", + " max_leaves=None, min_child_weight=None,\n", + " missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None,\n", + " num_parallel_tree=None, predictor=None,\n", + " random_state=42, ...),\n", + " param_grid={'colsample_bytree': [0.5, 0.8],\n", + " 'gamma': [0.1, 0.01, 0.001],\n", + " 'learning_rate': [0.1, 0.01, 0.001], 'max_depth': [3],\n", + " 'n_estimators': [100], 'subsample': [0.8, 1.0]},\n", + " verbose=3)</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-30\" type=\"checkbox\" ><label for=\"sk-estimator-id-30\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">estimator: XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=42, ...)</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-31\" type=\"checkbox\" ><label for=\"sk-estimator-id-31\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=42, ...)</pre></div></div></div></div></div></div></div></div></div></div>" + ], + "text/plain": [ + "GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,\n", + " callbacks=None, colsample_bylevel=None,\n", + " colsample_bynode=None,\n", + " colsample_bytree=None,\n", + " early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None,\n", + " feature_types=None, gamma=None,\n", + " gpu_id=None, grow_policy=None,\n", + " importance_type=None,\n", + " interaction_constraints=None,\n", + " learning_rate=None, max_b...\n", + " max_delta_step=None, max_depth=None,\n", + " max_leaves=None, min_child_weight=None,\n", + " missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None,\n", + " num_parallel_tree=None, predictor=None,\n", + " random_state=42, ...),\n", + " param_grid={'colsample_bytree': [0.5, 0.8],\n", + " 'gamma': [0.1, 0.01, 0.001],\n", + " 'learning_rate': [0.1, 0.01, 0.001], 'max_depth': [3],\n", + " 'n_estimators': [100], 'subsample': [0.8, 1.0]},\n", + " verbose=3)" + ] + }, + "execution_count": 264, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = xgb.XGBClassifier(random_state=42)\n", + "\n", + "# Defining parameter range\n", + "param_grid = {\n", + " 'max_depth': [3],\n", + " 'learning_rate': [0.1 ,0.01, 0.001],\n", + " 'n_estimators': [100],\n", + " 'gamma': [ 0.1,0.01,0.001],\n", + " 'subsample': [0.8, 1.0],\n", + " 'colsample_bytree': [ 0.5, 0.8]\n", + "}\n", + "\n", + "grid = GridSearchCV(model, param_grid, refit=True, verbose=3)\n", + "\n", + "# Fitting the model for grid search\n", + "grid.fit(X_train, y_train)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "id": "556e249c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'colsample_bytree': 0.5, 'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}\n", + "XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=0.5, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=0.1, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.1, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=3, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=42, ...)\n" + ] + } + ], + "source": [ + "# print best parameter after tuning\n", + "print(grid.best_params_)\n", + " \n", + "# print how our model looks after hyper-parameter tuning\n", + "print(grid.best_estimator_)" + ] + }, + { + "cell_type": "markdown", + "id": "3ea57532", + "metadata": {}, + "source": [ + "# classification report" + ] + }, + { + "cell_type": "code", + "execution_count": 266, + "id": "18becbe2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.97 0.98 0.97 58\n", + " 1 0.98 0.97 0.98 61\n", + "\n", + " accuracy 0.97 119\n", + " macro avg 0.97 0.97 0.97 119\n", + "weighted avg 0.97 0.97 0.97 119\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.metrics import classification_report, confusion_matrix\n", + "grid_predictions = grid.predict(X_test)\n", + "print(classification_report(y_test, grid_predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": 267, + "id": "53a7f793", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<style>#sk-container-id-16 {color: black;background-color: white;}#sk-container-id-16 pre{padding: 0;}#sk-container-id-16 div.sk-toggleable {background-color: white;}#sk-container-id-16 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-16 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-16 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-16 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-16 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-16 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-16 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-16 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-16 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-16 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-16 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-16 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-16 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-16 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-16 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-16 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-16 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-16 div.sk-item {position: relative;z-index: 1;}#sk-container-id-16 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-16 div.sk-item::before, #sk-container-id-16 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-16 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-16 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-16 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-16 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-16 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-16 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-16 div.sk-label-container {text-align: center;}#sk-container-id-16 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-16 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-16\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=0.5, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=0.1, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.1, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=3, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=42, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-32\" type=\"checkbox\" checked><label for=\"sk-estimator-id-32\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=0.5, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=0.1, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.1, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=3, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=42, ...)</pre></div></div></div></div></div>" + ], + "text/plain": [ + "XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=0.5, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=0.1, gpu_id=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.1, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=3, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " n_estimators=100, n_jobs=None, num_parallel_tree=None,\n", + " predictor=None, random_state=42, ...)" + ] + }, + "execution_count": 267, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_xgb = grid.best_estimator_\n", + "model_xgb.fit(X_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "id": "9ed43446", + "metadata": {}, + "outputs": [], + "source": [ + "y_proba = model_xgb.fit(X_train, y_train).predict_proba(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "id": "c0193b78", + "metadata": {}, + "outputs": [], + "source": [ + "classes = model_xgb.classes_" + ] + }, + { + "cell_type": "code", + "execution_count": 270, + "id": "d723c69f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1])" + ] + }, + "execution_count": 270, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "classes" + ] + }, + { + "cell_type": "markdown", + "id": "6603d82c", + "metadata": {}, + "source": [ + "# ROC curve" + ] + }, + { + "cell_type": "code", + "execution_count": 279, + "id": "0e2a2694", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.feature_selection import SelectKBest, f_classif\n", + "from sklearn.metrics import auc\n", + "def roc(X_train,y_train,model,label):\n", + " cv = StratifiedKFold(n_splits=6)\n", + " classifier = model\n", + " tprs = []\n", + " aucs = []\n", + " mean_fpr = np.linspace(0, 1, 100)\n", + "\n", + " fig, ax = plt.subplots(figsize=(6, 6))\n", + " for fold, (train, test) in enumerate(cv.split(X_train, y_train)):\n", + " classifier.fit(X_train.iloc[train], y_train.iloc[train])\n", + " viz = RocCurveDisplay.from_estimator(\n", + " classifier,\n", + " X_train.iloc[test],\n", + " y_train.iloc[test],\n", + " name=f\"ROC fold {fold}\",\n", + " alpha=0.3,\n", + " lw=1,\n", + " ax=ax,\n", + " )\n", + " interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)\n", + " interp_tpr[0] = 0.0\n", + " tprs.append(interp_tpr)\n", + " aucs.append(viz.roc_auc)\n", + " ax.plot([0, 1], [0, 1], \"k--\", label=\"chance level (AUC = 0.5)\")\n", + "\n", + " mean_tpr = np.mean(tprs, axis=0)\n", + " mean_tpr[-1] = 1.0\n", + " mean_auc = auc(mean_fpr, mean_tpr)\n", + " std_auc = np.std(aucs)\n", + " ax.plot(\n", + " mean_fpr,\n", + " mean_tpr,\n", + " color=\"b\",\n", + " label=r\"Mean ROC (AUC = %0.2f $\\pm$ %0.2f)\" % (mean_auc, std_auc),\n", + " lw=2,\n", + " alpha=0.8,\n", + " )\n", + "\n", + " std_tpr = np.std(tprs, axis=0)\n", + " tprs_upper = np.minimum(mean_tpr + std_tpr, 1)\n", + " tprs_lower = np.maximum(mean_tpr - std_tpr, 0)\n", + " ax.fill_between(\n", + " mean_fpr,\n", + " tprs_lower,\n", + " tprs_upper,\n", + " color=\"grey\",\n", + " alpha=0.2,\n", + " label=r\"$\\pm$ 1 std. dev.\",\n", + " )\n", + "\n", + " ax.set(\n", + " xlim=[-0.05, 1.05],\n", + " ylim=[-0.05, 1.05],\n", + " xlabel=\"False Positive Rate\",\n", + " ylabel=\"True Positive Rate\",\n", + " title=label,\n", + " )\n", + " ax.axis(\"square\")\n", + " ax.legend(loc=\"lower right\")\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 280, + "id": "d4cc8e6d", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 600x600 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "model = model_xgb\n", + "label=\"ROC curve of training data\"\n", + "roc(X_train,y_train,model,label)" + ] + }, + { + "cell_type": "code", + "execution_count": 281, + "id": "1199e2e4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 600x600 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "label=\"ROC curve of testing data\"\n", + "roc(X_test,y_test,model,label)" + ] + }, + { + "cell_type": "markdown", + "id": "becb1b58", + "metadata": {}, + "source": [ + "# Miscellaneous:" + ] + }, + { + "cell_type": "code", + "execution_count": 282, + "id": "b76a44fc", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#######CONFUSION MATRIX ###########\n", + "from sklearn import metrics\n", + "y_test_pred_xgb = model_xgb.predict(X_test)\n", + "confusion_matrix_test = metrics.confusion_matrix(y_test, y_test_pred_xgb)\n", + "cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix_test)\n", + "cm_display.plot()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 283, + "id": "416c4ba4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#######CONFUSION MATRIX ###########\n", + "y_train_pred_xgb = model_xgb.predict(X_train)\n", + "confusion_matrix_train = metrics.confusion_matrix(y_train, y_train_pred_xgb)\n", + "cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix_train)\n", + "cm_display.plot()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "bee03388", + "metadata": {}, + "source": [ + "# Feature importance" + ] + }, + { + "cell_type": "code", + "execution_count": 284, + "id": "6688e037", + "metadata": {}, + "outputs": [], + "source": [ + "# for important features:\n", + "important_feat = model_xgb.feature_importances_\n", + "#get indices of those important features\n", + "idx = important_feat.argsort(kind= \"quicksort\")\n", + "idx= idx[::-1][:50]" + ] + }, + { + "cell_type": "code", + "execution_count": 285, + "id": "4e6a7ea1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 66, 65, 84, 94, 140, 32, 169, 137, 23, 212, 10, 166, 13,\n", + " 36, 56, 126, 48, 57, 42, 208, 37, 113, 29, 160, 22, 96,\n", + " 162, 229, 189, 101, 104, 127, 135, 21, 79, 78, 77, 76, 75,\n", + " 74, 73, 72, 202, 71, 69, 68, 67, 64, 63, 62])" + ] + }, + "execution_count": 285, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx" + ] + }, + { + "cell_type": "code", + "execution_count": 286, + "id": "f2101fe1", + "metadata": {}, + "outputs": [], + "source": [ + "df1 = X.T" + ] + }, + { + "cell_type": "code", + "execution_count": 287, + "id": "2cbf1166", + "metadata": {}, + "outputs": [], + "source": [ + "top_met = df1.iloc[idx]" + ] + }, + { + "cell_type": "code", + "execution_count": 288, + "id": "2370b2df", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['hsa-miR-18b-5p', 'hsa-miR-18a-5p', 'hsa-miR-21-5p', 'hsa-miR-25-3p',\n", + " 'hsa-miR-424-5p', 'hsa-miR-130b-3p', 'hsa-miR-455-3p', 'hsa-miR-378i',\n", + " 'hsa-miR-1268a', 'hsa-miR-93-5p', 'hsa-miR-106b-5p', 'hsa-miR-451a',\n", + " 'hsa-miR-10b-5p', 'hsa-miR-140-3p', 'hsa-miR-15b-5p', 'hsa-miR-3651',\n", + " 'hsa-miR-150-5p', 'hsa-miR-16-2-3p', 'hsa-miR-145-5p', 'hsa-miR-7-5p',\n", + " 'hsa-miR-140-5p', 'hsa-miR-3198', 'hsa-miR-1290', 'hsa-miR-4465',\n", + " 'hsa-miR-126-3p', 'hsa-miR-26b-5p', 'hsa-miR-4497', 'mr_1',\n", + " 'hsa-miR-497-5p', 'hsa-miR-29c-3p', 'hsa-miR-30a-5p', 'hsa-miR-3656',\n", + " 'hsa-miR-378a-3p', 'hsa-miR-125b-5p', 'hsa-miR-200c-3p',\n", + " 'hsa-miR-200b-3p', 'hsa-miR-19b-3p', 'hsa-miR-19a-3p',\n", + " 'hsa-miR-199a-5p', 'hsa-miR-199a-3p', 'hsa-miR-1973', 'hsa-miR-197-5p',\n", + " 'hsa-miR-642a-3p', 'hsa-miR-197-3p', 'hsa-miR-193b-3p',\n", + " 'hsa-miR-193a-5p', 'hsa-miR-1915-3p', 'hsa-miR-188-5p',\n", + " 'hsa-miR-185-5p', 'hsa-miR-181b-5p'],\n", + " dtype='object')" + ] + }, + "execution_count": 288, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top_met.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9d668aa", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03b3840d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6eddc5ba", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab