diff --git a/Machine Learning/DS_miRNA_limma_dataset_xgb_final-F.ipynb b/Machine Learning/DS_miRNA_limma_dataset_xgb_final-F.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e19cab4d08e4dff8eb9df8eedce78d1e53ed6add
--- /dev/null
+++ b/Machine Learning/DS_miRNA_limma_dataset_xgb_final-F.ipynb	
@@ -0,0 +1,1504 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "f097ad55",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "#from sklearn.model_selection import cross_val_score\n",
+    "#from sklearn.metrics import accuracy_score\n",
+    "#import sklearn.metrics as metrics\n",
+    "#from sklearn.metrics import auc\n",
+    "from sklearn.metrics import RocCurveDisplay\n",
+    "#from sklearn.model_selection import KFold\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "from imblearn.over_sampling import SMOTE\n",
+    "from sklearn.linear_model import Lasso\n",
+    "import xgboost as xgb\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "#np.random.seed(7)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "73b6611a",
+   "metadata": {},
+   "source": [
+    "# Data Preprocessing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "0eeb7a35",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"DS/miRNA_DS_preprocessed_data.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "6e7836e1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(230, 239)"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "683b63ce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.T"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "2e78017d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Transform the input data\n",
+    "df.rename(columns=df.iloc[0], inplace = True)\n",
+    "df.drop(df.index[0], inplace = True)\n",
+    "df=df.reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "1647a959",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>dmr_3</th>\n",
+       "      <th>dmr_31a</th>\n",
+       "      <th>dmr_6</th>\n",
+       "      <th>ebv-miR-BART13</th>\n",
+       "      <th>hsa-let-7c</th>\n",
+       "      <th>hsa-let-7d-5p</th>\n",
+       "      <th>hsa-let-7i-5p</th>\n",
+       "      <th>hsa-miR-100-5p</th>\n",
+       "      <th>hsa-miR-101-3p</th>\n",
+       "      <th>...</th>\n",
+       "      <th>hsv2-miR-H24</th>\n",
+       "      <th>hsv2-miR-H25</th>\n",
+       "      <th>hsv2-miR-H6</th>\n",
+       "      <th>hur_1</th>\n",
+       "      <th>hur_2</th>\n",
+       "      <th>hur_4</th>\n",
+       "      <th>hur_5</th>\n",
+       "      <th>hur_6</th>\n",
+       "      <th>miRNABrightCorner30</th>\n",
+       "      <th>mr_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>GSM1069774</td>\n",
+       "      <td>0.732675</td>\n",
+       "      <td>-0.242559</td>\n",
+       "      <td>0.577801</td>\n",
+       "      <td>-4.469532</td>\n",
+       "      <td>1.195899</td>\n",
+       "      <td>-0.334742</td>\n",
+       "      <td>0.89199</td>\n",
+       "      <td>-2.089223</td>\n",
+       "      <td>-2.757097</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-3.956004</td>\n",
+       "      <td>-3.936689</td>\n",
+       "      <td>-4.099346</td>\n",
+       "      <td>6.98856</td>\n",
+       "      <td>7.041557</td>\n",
+       "      <td>3.822267</td>\n",
+       "      <td>-2.268209</td>\n",
+       "      <td>5.114399</td>\n",
+       "      <td>2.017444</td>\n",
+       "      <td>1.640437</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>GSM1069775</td>\n",
+       "      <td>0.249772</td>\n",
+       "      <td>-0.655514</td>\n",
+       "      <td>0.104933</td>\n",
+       "      <td>-5.209572</td>\n",
+       "      <td>0.498366</td>\n",
+       "      <td>-0.194772</td>\n",
+       "      <td>0.637863</td>\n",
+       "      <td>-2.357572</td>\n",
+       "      <td>-2.196884</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-4.334103</td>\n",
+       "      <td>-4.561624</td>\n",
+       "      <td>-4.719714</td>\n",
+       "      <td>6.774479</td>\n",
+       "      <td>6.862654</td>\n",
+       "      <td>3.529789</td>\n",
+       "      <td>-2.656642</td>\n",
+       "      <td>4.327117</td>\n",
+       "      <td>2.022346</td>\n",
+       "      <td>0.79426</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>GSM1069776</td>\n",
+       "      <td>0.400779</td>\n",
+       "      <td>-0.597444</td>\n",
+       "      <td>0.232702</td>\n",
+       "      <td>-4.952808</td>\n",
+       "      <td>1.081166</td>\n",
+       "      <td>0.249982</td>\n",
+       "      <td>1.45018</td>\n",
+       "      <td>-1.138559</td>\n",
+       "      <td>-1.802774</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-4.550077</td>\n",
+       "      <td>-4.40729</td>\n",
+       "      <td>-4.621278</td>\n",
+       "      <td>6.808404</td>\n",
+       "      <td>6.75867</td>\n",
+       "      <td>3.496675</td>\n",
+       "      <td>-2.676555</td>\n",
+       "      <td>4.616284</td>\n",
+       "      <td>1.498011</td>\n",
+       "      <td>1.584544</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>GSM1069777</td>\n",
+       "      <td>0.380263</td>\n",
+       "      <td>-0.900491</td>\n",
+       "      <td>0.243207</td>\n",
+       "      <td>-4.892073</td>\n",
+       "      <td>-0.023958</td>\n",
+       "      <td>-0.980435</td>\n",
+       "      <td>1.071857</td>\n",
+       "      <td>-2.077406</td>\n",
+       "      <td>-2.11406</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-4.018911</td>\n",
+       "      <td>-4.203106</td>\n",
+       "      <td>-3.938707</td>\n",
+       "      <td>6.524773</td>\n",
+       "      <td>6.497959</td>\n",
+       "      <td>3.541502</td>\n",
+       "      <td>-3.073553</td>\n",
+       "      <td>4.581648</td>\n",
+       "      <td>0.789822</td>\n",
+       "      <td>1.255367</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>GSM1069778</td>\n",
+       "      <td>0.422207</td>\n",
+       "      <td>-0.414831</td>\n",
+       "      <td>-0.000781</td>\n",
+       "      <td>-5.139127</td>\n",
+       "      <td>1.077485</td>\n",
+       "      <td>-0.684875</td>\n",
+       "      <td>0.724751</td>\n",
+       "      <td>-0.689096</td>\n",
+       "      <td>-1.182558</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-3.690971</td>\n",
+       "      <td>-4.332452</td>\n",
+       "      <td>-4.178727</td>\n",
+       "      <td>6.562608</td>\n",
+       "      <td>6.529399</td>\n",
+       "      <td>3.305132</td>\n",
+       "      <td>-2.964948</td>\n",
+       "      <td>4.487481</td>\n",
+       "      <td>1.219583</td>\n",
+       "      <td>0.951615</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>233</th>\n",
+       "      <td>GSM1070007</td>\n",
+       "      <td>0.98797</td>\n",
+       "      <td>-0.118186</td>\n",
+       "      <td>0.750199</td>\n",
+       "      <td>-4.572984</td>\n",
+       "      <td>0.696251</td>\n",
+       "      <td>-1.089669</td>\n",
+       "      <td>0.826</td>\n",
+       "      <td>-1.604393</td>\n",
+       "      <td>-2.87334</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-2.163581</td>\n",
+       "      <td>-2.15805</td>\n",
+       "      <td>-2.302647</td>\n",
+       "      <td>7.093144</td>\n",
+       "      <td>7.150126</td>\n",
+       "      <td>3.899704</td>\n",
+       "      <td>-2.954284</td>\n",
+       "      <td>5.505105</td>\n",
+       "      <td>2.457963</td>\n",
+       "      <td>2.142301</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>234</th>\n",
+       "      <td>GSM1070008</td>\n",
+       "      <td>-0.194781</td>\n",
+       "      <td>-0.710519</td>\n",
+       "      <td>-0.700226</td>\n",
+       "      <td>-5.651293</td>\n",
+       "      <td>0.742722</td>\n",
+       "      <td>-0.964527</td>\n",
+       "      <td>0.570816</td>\n",
+       "      <td>-1.046029</td>\n",
+       "      <td>-1.840615</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-4.507365</td>\n",
+       "      <td>-4.23831</td>\n",
+       "      <td>-4.63219</td>\n",
+       "      <td>6.18658</td>\n",
+       "      <td>6.232722</td>\n",
+       "      <td>2.788619</td>\n",
+       "      <td>-3.103706</td>\n",
+       "      <td>4.340513</td>\n",
+       "      <td>0.232713</td>\n",
+       "      <td>1.067806</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>235</th>\n",
+       "      <td>GSM1070009</td>\n",
+       "      <td>0.21218</td>\n",
+       "      <td>-0.284657</td>\n",
+       "      <td>-0.32472</td>\n",
+       "      <td>-4.800142</td>\n",
+       "      <td>1.0062</td>\n",
+       "      <td>-0.141699</td>\n",
+       "      <td>0.80704</td>\n",
+       "      <td>-0.993146</td>\n",
+       "      <td>-0.823621</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-2.737709</td>\n",
+       "      <td>-2.644713</td>\n",
+       "      <td>-3.253632</td>\n",
+       "      <td>6.505956</td>\n",
+       "      <td>6.548781</td>\n",
+       "      <td>3.12575</td>\n",
+       "      <td>-2.917537</td>\n",
+       "      <td>4.838599</td>\n",
+       "      <td>0.863574</td>\n",
+       "      <td>1.203499</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>236</th>\n",
+       "      <td>GSM1070010</td>\n",
+       "      <td>0.330997</td>\n",
+       "      <td>-0.19446</td>\n",
+       "      <td>-0.206405</td>\n",
+       "      <td>-4.840442</td>\n",
+       "      <td>1.521159</td>\n",
+       "      <td>-0.424901</td>\n",
+       "      <td>0.886358</td>\n",
+       "      <td>-0.031455</td>\n",
+       "      <td>-1.584939</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-3.292034</td>\n",
+       "      <td>-2.941633</td>\n",
+       "      <td>-3.939222</td>\n",
+       "      <td>6.790132</td>\n",
+       "      <td>6.829164</td>\n",
+       "      <td>3.365475</td>\n",
+       "      <td>-2.736411</td>\n",
+       "      <td>5.185601</td>\n",
+       "      <td>0.846454</td>\n",
+       "      <td>1.604729</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>237</th>\n",
+       "      <td>GSM1070011</td>\n",
+       "      <td>0.474815</td>\n",
+       "      <td>0.043697</td>\n",
+       "      <td>-0.102511</td>\n",
+       "      <td>-4.849285</td>\n",
+       "      <td>1.239637</td>\n",
+       "      <td>-0.704124</td>\n",
+       "      <td>0.698355</td>\n",
+       "      <td>-0.414715</td>\n",
+       "      <td>-1.721427</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-3.378909</td>\n",
+       "      <td>-2.909732</td>\n",
+       "      <td>-3.510667</td>\n",
+       "      <td>6.80237</td>\n",
+       "      <td>6.784016</td>\n",
+       "      <td>3.514036</td>\n",
+       "      <td>-2.931018</td>\n",
+       "      <td>4.798139</td>\n",
+       "      <td>2.08952</td>\n",
+       "      <td>1.597958</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>238 rows × 231 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          index     dmr_3   dmr_31a     dmr_6 ebv-miR-BART13 hsa-let-7c  \\\n",
+       "0    GSM1069774  0.732675 -0.242559  0.577801      -4.469532   1.195899   \n",
+       "1    GSM1069775  0.249772 -0.655514  0.104933      -5.209572   0.498366   \n",
+       "2    GSM1069776  0.400779 -0.597444  0.232702      -4.952808   1.081166   \n",
+       "3    GSM1069777  0.380263 -0.900491  0.243207      -4.892073  -0.023958   \n",
+       "4    GSM1069778  0.422207 -0.414831 -0.000781      -5.139127   1.077485   \n",
+       "..          ...       ...       ...       ...            ...        ...   \n",
+       "233  GSM1070007   0.98797 -0.118186  0.750199      -4.572984   0.696251   \n",
+       "234  GSM1070008 -0.194781 -0.710519 -0.700226      -5.651293   0.742722   \n",
+       "235  GSM1070009   0.21218 -0.284657  -0.32472      -4.800142     1.0062   \n",
+       "236  GSM1070010  0.330997  -0.19446 -0.206405      -4.840442   1.521159   \n",
+       "237  GSM1070011  0.474815  0.043697 -0.102511      -4.849285   1.239637   \n",
+       "\n",
+       "    hsa-let-7d-5p hsa-let-7i-5p hsa-miR-100-5p hsa-miR-101-3p  ...  \\\n",
+       "0       -0.334742       0.89199      -2.089223      -2.757097  ...   \n",
+       "1       -0.194772      0.637863      -2.357572      -2.196884  ...   \n",
+       "2        0.249982       1.45018      -1.138559      -1.802774  ...   \n",
+       "3       -0.980435      1.071857      -2.077406       -2.11406  ...   \n",
+       "4       -0.684875      0.724751      -0.689096      -1.182558  ...   \n",
+       "..            ...           ...            ...            ...  ...   \n",
+       "233     -1.089669         0.826      -1.604393       -2.87334  ...   \n",
+       "234     -0.964527      0.570816      -1.046029      -1.840615  ...   \n",
+       "235     -0.141699       0.80704      -0.993146      -0.823621  ...   \n",
+       "236     -0.424901      0.886358      -0.031455      -1.584939  ...   \n",
+       "237     -0.704124      0.698355      -0.414715      -1.721427  ...   \n",
+       "\n",
+       "    hsv2-miR-H24 hsv2-miR-H25 hsv2-miR-H6     hur_1     hur_2     hur_4  \\\n",
+       "0      -3.956004    -3.936689   -4.099346   6.98856  7.041557  3.822267   \n",
+       "1      -4.334103    -4.561624   -4.719714  6.774479  6.862654  3.529789   \n",
+       "2      -4.550077     -4.40729   -4.621278  6.808404   6.75867  3.496675   \n",
+       "3      -4.018911    -4.203106   -3.938707  6.524773  6.497959  3.541502   \n",
+       "4      -3.690971    -4.332452   -4.178727  6.562608  6.529399  3.305132   \n",
+       "..           ...          ...         ...       ...       ...       ...   \n",
+       "233    -2.163581     -2.15805   -2.302647  7.093144  7.150126  3.899704   \n",
+       "234    -4.507365     -4.23831    -4.63219   6.18658  6.232722  2.788619   \n",
+       "235    -2.737709    -2.644713   -3.253632  6.505956  6.548781   3.12575   \n",
+       "236    -3.292034    -2.941633   -3.939222  6.790132  6.829164  3.365475   \n",
+       "237    -3.378909    -2.909732   -3.510667   6.80237  6.784016  3.514036   \n",
+       "\n",
+       "        hur_5     hur_6 miRNABrightCorner30      mr_1  \n",
+       "0   -2.268209  5.114399            2.017444  1.640437  \n",
+       "1   -2.656642  4.327117            2.022346   0.79426  \n",
+       "2   -2.676555  4.616284            1.498011  1.584544  \n",
+       "3   -3.073553  4.581648            0.789822  1.255367  \n",
+       "4   -2.964948  4.487481            1.219583  0.951615  \n",
+       "..        ...       ...                 ...       ...  \n",
+       "233 -2.954284  5.505105            2.457963  2.142301  \n",
+       "234 -3.103706  4.340513            0.232713  1.067806  \n",
+       "235 -2.917537  4.838599            0.863574  1.203499  \n",
+       "236 -2.736411  5.185601            0.846454  1.604729  \n",
+       "237 -2.931018  4.798139             2.08952  1.597958  \n",
+       "\n",
+       "[238 rows x 231 columns]"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "4c50c510",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metadata = pd.read_csv(\"DS/miRNA_DS_metadata_col_info.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "6730cf89",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df= df.merge(metadata, left_on=\"index\", right_on= \"Unnamed: 0\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "id": "7a8ad8ad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['title0'] = df['title0'].replace('(?i)mucosa|normal|healthy', 0, regex=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "a8cf8643",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['title0'] = df['title0'].replace('(?i)Tumor|Cancer|carcinoma', 1, regex=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "id": "5c852a3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "title0\n",
+       "1    119\n",
+       "0    119\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df['title0'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "f5d203aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df[pd.to_numeric(df['title0'], errors='coerce').notnull()]#remove all non-numeric data from the column."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "523bdaa6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df= df.drop(['index', 'Unnamed: 0'], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "id": "46a6fb36",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df= df.rename(columns={\"title0\": \"index\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "id": "e26f88c5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "index\n",
+       "1    119\n",
+       "0    119\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df['index'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "id": "fbaf2507",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df= df.apply(pd.to_numeric)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "f3f7adb5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "index\n",
+       "1    119\n",
+       "0    119\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df['index'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "6a50f416",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X=df.drop(\"index\",axis=1)\n",
+    "y=df['index']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "e644ab0e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y=y.astype('int')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6cee6462",
+   "metadata": {},
+   "source": [
+    "# Test train split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "1da48142",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# split data into training and testing data-sets\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "129430e6",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(index\n",
+       " 0    30\n",
+       " 1    30\n",
+       " Name: count, dtype: int64,\n",
+       " index\n",
+       " 0    89\n",
+       " 1    89\n",
+       " Name: count, dtype: int64)"
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_test.value_counts(),y_train.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1cfe2a06",
+   "metadata": {},
+   "source": [
+    "# Cross validation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "d3550b5e",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 5 folds for each of 36 candidates, totalling 180 fits\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.1s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.1s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.1s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.1s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.3s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.3s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.1s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.1s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.1s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.1s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.1s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.889 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.4s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.917 total time=   0.3s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.3s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.3s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.4s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.889 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.1s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.3s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.917 total time=   0.3s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.3s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.4s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.3s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.1s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.889 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.1s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.1s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.944 total time=   0.4s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.889 total time=   0.3s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.4s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.3s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.3s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.1s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.889 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.1s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.944 total time=   0.4s\n",
+      "[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.889 total time=   0.4s\n",
+      "[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.4s\n",
+      "[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.4s\n",
+      "[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.4s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.1s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.1s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.1s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.1s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.3s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.1s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.1s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.1s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.1s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.1s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.917 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.3s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.917 total time=   0.3s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.4s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.4s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.4s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.917 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.3s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.917 total time=   0.3s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.4s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.3s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.3s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.1s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.889 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.3s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.944 total time=   0.4s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.889 total time=   0.4s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.3s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.3s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.3s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.889 total time=   0.1s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.1s\n",
+      "[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.944 total time=   0.3s\n",
+      "[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.889 total time=   0.4s\n",
+      "[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.4s\n",
+      "[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.3s\n",
+      "[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.4s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.1s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.1s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.1s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.1s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.1s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.1s\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.889 total time=   0.1s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.4s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.917 total time=   0.3s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.3s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.3s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.3s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.889 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.1s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=1.000 total time=   0.4s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.917 total time=   0.4s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.4s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.4s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.4s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.889 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.944 total time=   0.3s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.889 total time=   0.3s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.4s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.4s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.3s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.944 total time=   0.2s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.889 total time=   0.2s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.972 total time=   0.2s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.971 total time=   0.2s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1.0;, score=0.943 total time=   0.2s\n",
+      "[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.944 total time=   0.4s\n",
+      "[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.889 total time=   0.3s\n",
+      "[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.972 total time=   0.3s\n",
+      "[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.971 total time=   0.4s\n",
+      "[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1.0;, score=0.943 total time=   0.4s\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-3 {color: black;background-color: white;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"â–¸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"â–¾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,\n",
+       "                                     callbacks=None, colsample_bylevel=None,\n",
+       "                                     colsample_bynode=None,\n",
+       "                                     colsample_bytree=None,\n",
+       "                                     early_stopping_rounds=None,\n",
+       "                                     enable_categorical=False, eval_metric=None,\n",
+       "                                     feature_types=None, gamma=None,\n",
+       "                                     gpu_id=None, grow_policy=None,\n",
+       "                                     importance_type=None,\n",
+       "                                     interaction_constraints=None,\n",
+       "                                     learning_rate=None, max_b...\n",
+       "                                     max_cat_to_onehot=None,\n",
+       "                                     max_delta_step=None, max_depth=None,\n",
+       "                                     max_leaves=None, min_child_weight=None,\n",
+       "                                     missing=nan, monotone_constraints=None,\n",
+       "                                     n_estimators=100, n_jobs=None,\n",
+       "                                     num_parallel_tree=None, predictor=None,\n",
+       "                                     random_state=42, ...),\n",
+       "             param_grid={&#x27;gamma&#x27;: [0.1, 0.01, 0.001],\n",
+       "                         &#x27;learning_rate&#x27;: [0.1, 0.01, 0.001],\n",
+       "                         &#x27;max_depth&#x27;: [3, 5], &#x27;n_estimators&#x27;: [100, 200],\n",
+       "                         &#x27;subsample&#x27;: [1.0]},\n",
+       "             verbose=3)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">GridSearchCV</label><div class=\"sk-toggleable__content\"><pre>GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,\n",
+       "                                     callbacks=None, colsample_bylevel=None,\n",
+       "                                     colsample_bynode=None,\n",
+       "                                     colsample_bytree=None,\n",
+       "                                     early_stopping_rounds=None,\n",
+       "                                     enable_categorical=False, eval_metric=None,\n",
+       "                                     feature_types=None, gamma=None,\n",
+       "                                     gpu_id=None, grow_policy=None,\n",
+       "                                     importance_type=None,\n",
+       "                                     interaction_constraints=None,\n",
+       "                                     learning_rate=None, max_b...\n",
+       "                                     max_cat_to_onehot=None,\n",
+       "                                     max_delta_step=None, max_depth=None,\n",
+       "                                     max_leaves=None, min_child_weight=None,\n",
+       "                                     missing=nan, monotone_constraints=None,\n",
+       "                                     n_estimators=100, n_jobs=None,\n",
+       "                                     num_parallel_tree=None, predictor=None,\n",
+       "                                     random_state=42, ...),\n",
+       "             param_grid={&#x27;gamma&#x27;: [0.1, 0.01, 0.001],\n",
+       "                         &#x27;learning_rate&#x27;: [0.1, 0.01, 0.001],\n",
+       "                         &#x27;max_depth&#x27;: [3, 5], &#x27;n_estimators&#x27;: [100, 200],\n",
+       "                         &#x27;subsample&#x27;: [1.0]},\n",
+       "             verbose=3)</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">estimator: XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
+       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
+       "              predictor=None, random_state=42, ...)</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
+       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
+       "              predictor=None, random_state=42, ...)</pre></div></div></div></div></div></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,\n",
+       "                                     callbacks=None, colsample_bylevel=None,\n",
+       "                                     colsample_bynode=None,\n",
+       "                                     colsample_bytree=None,\n",
+       "                                     early_stopping_rounds=None,\n",
+       "                                     enable_categorical=False, eval_metric=None,\n",
+       "                                     feature_types=None, gamma=None,\n",
+       "                                     gpu_id=None, grow_policy=None,\n",
+       "                                     importance_type=None,\n",
+       "                                     interaction_constraints=None,\n",
+       "                                     learning_rate=None, max_b...\n",
+       "                                     max_cat_to_onehot=None,\n",
+       "                                     max_delta_step=None, max_depth=None,\n",
+       "                                     max_leaves=None, min_child_weight=None,\n",
+       "                                     missing=nan, monotone_constraints=None,\n",
+       "                                     n_estimators=100, n_jobs=None,\n",
+       "                                     num_parallel_tree=None, predictor=None,\n",
+       "                                     random_state=42, ...),\n",
+       "             param_grid={'gamma': [0.1, 0.01, 0.001],\n",
+       "                         'learning_rate': [0.1, 0.01, 0.001],\n",
+       "                         'max_depth': [3, 5], 'n_estimators': [100, 200],\n",
+       "                         'subsample': [1.0]},\n",
+       "             verbose=3)"
+      ]
+     },
+     "execution_count": 64,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = xgb.XGBClassifier(random_state=42)\n",
+    "\n",
+    "# Defining parameter range\n",
+    "param_grid = {\n",
+    "    'max_depth': [3,5],\n",
+    "    'learning_rate': [0.1 ,0.01, 0.001],\n",
+    "    'n_estimators': [100,200],\n",
+    "    'gamma': [ 0.1,0.01,0.001],\n",
+    "    'subsample': [1.0]\n",
+    "}\n",
+    "\n",
+    "\n",
+    "grid = GridSearchCV(model, param_grid, refit=True, verbose=3)\n",
+    "\n",
+    "# Fitting the model for grid search\n",
+    "grid.fit(X_train, y_train)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "556e249c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1.0}\n",
+      "XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+      "              colsample_bylevel=None, colsample_bynode=None,\n",
+      "              colsample_bytree=None, early_stopping_rounds=None,\n",
+      "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+      "              gamma=0.1, gpu_id=None, grow_policy=None, importance_type=None,\n",
+      "              interaction_constraints=None, learning_rate=0.1, max_bin=None,\n",
+      "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+      "              max_delta_step=None, max_depth=3, max_leaves=None,\n",
+      "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+      "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
+      "              predictor=None, random_state=42, ...)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# print best parameter after tuning\n",
+    "print(grid.best_params_)\n",
+    "  \n",
+    "# print how our model looks after hyper-parameter tuning\n",
+    "print(grid.best_estimator_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "id": "0686e808",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-4 {color: black;background-color: white;}#sk-container-id-4 pre{padding: 0;}#sk-container-id-4 div.sk-toggleable {background-color: white;}#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-4 label.sk-toggleable__label-arrow:before {content: \"â–¸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"â–¾\";}#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-4 div.sk-item {position: relative;z-index: 1;}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-4 div.sk-item::before, #sk-container-id-4 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-4 div.sk-label-container {text-align: center;}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-4 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              gamma=0.1, gpu_id=None, grow_policy=None, importance_type=None,\n",
+       "              interaction_constraints=None, learning_rate=0.1, max_bin=None,\n",
+       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "              max_delta_step=None, max_depth=3, max_leaves=None,\n",
+       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
+       "              predictor=None, random_state=42, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" checked><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              gamma=0.1, gpu_id=None, grow_policy=None, importance_type=None,\n",
+       "              interaction_constraints=None, learning_rate=0.1, max_bin=None,\n",
+       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "              max_delta_step=None, max_depth=3, max_leaves=None,\n",
+       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
+       "              predictor=None, random_state=42, ...)</pre></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              gamma=0.1, gpu_id=None, grow_policy=None, importance_type=None,\n",
+       "              interaction_constraints=None, learning_rate=0.1, max_bin=None,\n",
+       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "              max_delta_step=None, max_depth=3, max_leaves=None,\n",
+       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
+       "              predictor=None, random_state=42, ...)"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_xgb = grid.best_estimator_\n",
+    "model_xgb.fit(X_train,y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "ac776bef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_proba = model_xgb.fit(X_train, y_train).predict_proba(X_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ea57532",
+   "metadata": {},
+   "source": [
+    "# classification report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "id": "18becbe2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.97      0.97      0.97        30\n",
+      "           1       0.97      0.97      0.97        30\n",
+      "\n",
+      "    accuracy                           0.97        60\n",
+      "   macro avg       0.97      0.97      0.97        60\n",
+      "weighted avg       0.97      0.97      0.97        60\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import classification_report, confusion_matrix\n",
+    "grid_predictions = grid.predict(X_test)\n",
+    "print(classification_report(y_test, grid_predictions))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "id": "c0193b78",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "classes = model_xgb.classes_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "id": "d723c69f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 1])"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "classes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "id": "4643393d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 640x480 with 2 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "#######CONFUSION MATRIX ###########\n",
+    "from sklearn import metrics\n",
+    "y_test_pred_xgb = model_xgb.predict(X_test)\n",
+    "confusion_matrix_test = metrics.confusion_matrix(y_test, y_test_pred_xgb)\n",
+    "cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix_test)\n",
+    "cm_display.plot()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "id": "5ad4efb1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy :  0.9666666666666667\n",
+      "Sensitivity :  0.9666666666666667\n",
+      "Specificity :  0.9666666666666667\n"
+     ]
+    }
+   ],
+   "source": [
+    "total1=sum(sum(confusion_matrix_test))\n",
+    "#####from confusion matrix calculate accuracy\n",
+    "accuracy1=(confusion_matrix_test[0,0]+confusion_matrix_test[1,1])/total1\n",
+    "print ('Accuracy : ', accuracy1)\n",
+    "\n",
+    "sensitivity1 = confusion_matrix_test[0,0]/(confusion_matrix_test[0,0]+confusion_matrix_test[0,1])\n",
+    "print('Sensitivity : ', sensitivity1 )\n",
+    "\n",
+    "specificity1 = confusion_matrix_test[1,1]/(confusion_matrix_test[1,0]+confusion_matrix_test[1,1])\n",
+    "print('Specificity : ', specificity1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6603d82c",
+   "metadata": {},
+   "source": [
+    "# ROC curve"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "id": "0e2a2694",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import StratifiedKFold\n",
+    "from sklearn.feature_selection import SelectKBest, f_classif\n",
+    "from sklearn.metrics import auc\n",
+    "def roc(X_train,y_train,model,label):\n",
+    "    cv = StratifiedKFold(n_splits=6)\n",
+    "    classifier = model\n",
+    "    tprs = []\n",
+    "    aucs = []\n",
+    "    mean_fpr = np.linspace(0, 1, 100)\n",
+    "\n",
+    "    fig, ax = plt.subplots(figsize=(6, 6))\n",
+    "    for fold, (train, test) in enumerate(cv.split(X_train, y_train)):\n",
+    "        classifier.fit(X_train.iloc[train], y_train.iloc[train])\n",
+    "        viz = RocCurveDisplay.from_estimator(\n",
+    "            classifier,\n",
+    "            X_train.iloc[test],\n",
+    "            y_train.iloc[test],\n",
+    "            name=f\"ROC fold {fold}\",\n",
+    "            alpha=0.3,\n",
+    "            lw=1,\n",
+    "            ax=ax,\n",
+    "        )\n",
+    "        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)\n",
+    "        interp_tpr[0] = 0.0\n",
+    "        tprs.append(interp_tpr)\n",
+    "        aucs.append(viz.roc_auc)\n",
+    "    ax.plot([0, 1], [0, 1], \"k--\", label=\"chance level (AUC = 0.5)\")\n",
+    "\n",
+    "    mean_tpr = np.mean(tprs, axis=0)\n",
+    "    mean_tpr[-1] = 1.0\n",
+    "    mean_auc = auc(mean_fpr, mean_tpr)\n",
+    "    std_auc = np.std(aucs)\n",
+    "    ax.plot(\n",
+    "        mean_fpr,\n",
+    "        mean_tpr,\n",
+    "        color=\"b\",\n",
+    "        label=r\"Mean ROC (AUC = %0.2f $\\pm$ %0.2f)\" % (mean_auc, std_auc),\n",
+    "        lw=2,\n",
+    "        alpha=0.8,\n",
+    "    )\n",
+    "\n",
+    "    std_tpr = np.std(tprs, axis=0)\n",
+    "    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)\n",
+    "    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)\n",
+    "    ax.fill_between(\n",
+    "        mean_fpr,\n",
+    "        tprs_lower,\n",
+    "        tprs_upper,\n",
+    "        color=\"grey\",\n",
+    "        alpha=0.2,\n",
+    "        label=r\"$\\pm$ 1 std. dev.\",\n",
+    "    )\n",
+    "\n",
+    "    ax.set(\n",
+    "        xlim=[-0.05, 1.05],\n",
+    "        ylim=[-0.05, 1.05],\n",
+    "        xlabel=\"False Positive Rate\",\n",
+    "        ylabel=\"True Positive Rate\",\n",
+    "        title=label,\n",
+    "    )\n",
+    "    ax.axis(\"square\")\n",
+    "    ax.legend(loc=\"lower right\")\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "id": "d4cc8e6d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 600x600 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "model = model_xgb\n",
+    "label=\"ROC curve of training data\"\n",
+    "roc(X_train,y_train,model,label)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "id": "1199e2e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 600x600 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "label=\"ROC curve of testing data\"\n",
+    "roc(X_test,y_test,model,label)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bee03388",
+   "metadata": {},
+   "source": [
+    "# Feature importance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 284,
+   "id": "6688e037",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for important features:\n",
+    "important_feat = model_xgb.feature_importances_\n",
+    "#get indices of those important features\n",
+    "idx = important_feat.argsort(kind= \"quicksort\")\n",
+    "idx= idx[::-1][:50]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 285,
+   "id": "4e6a7ea1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 66,  65,  84,  94, 140,  32, 169, 137,  23, 212,  10, 166,  13,\n",
+       "        36,  56, 126,  48,  57,  42, 208,  37, 113,  29, 160,  22,  96,\n",
+       "       162, 229, 189, 101, 104, 127, 135,  21,  79,  78,  77,  76,  75,\n",
+       "        74,  73,  72, 202,  71,  69,  68,  67,  64,  63,  62])"
+      ]
+     },
+     "execution_count": 285,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "idx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 286,
+   "id": "f2101fe1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df1 = X.T"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 287,
+   "id": "2cbf1166",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "top_met = df1.iloc[idx]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 288,
+   "id": "2370b2df",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['hsa-miR-18b-5p', 'hsa-miR-18a-5p', 'hsa-miR-21-5p', 'hsa-miR-25-3p',\n",
+       "       'hsa-miR-424-5p', 'hsa-miR-130b-3p', 'hsa-miR-455-3p', 'hsa-miR-378i',\n",
+       "       'hsa-miR-1268a', 'hsa-miR-93-5p', 'hsa-miR-106b-5p', 'hsa-miR-451a',\n",
+       "       'hsa-miR-10b-5p', 'hsa-miR-140-3p', 'hsa-miR-15b-5p', 'hsa-miR-3651',\n",
+       "       'hsa-miR-150-5p', 'hsa-miR-16-2-3p', 'hsa-miR-145-5p', 'hsa-miR-7-5p',\n",
+       "       'hsa-miR-140-5p', 'hsa-miR-3198', 'hsa-miR-1290', 'hsa-miR-4465',\n",
+       "       'hsa-miR-126-3p', 'hsa-miR-26b-5p', 'hsa-miR-4497', 'mr_1',\n",
+       "       'hsa-miR-497-5p', 'hsa-miR-29c-3p', 'hsa-miR-30a-5p', 'hsa-miR-3656',\n",
+       "       'hsa-miR-378a-3p', 'hsa-miR-125b-5p', 'hsa-miR-200c-3p',\n",
+       "       'hsa-miR-200b-3p', 'hsa-miR-19b-3p', 'hsa-miR-19a-3p',\n",
+       "       'hsa-miR-199a-5p', 'hsa-miR-199a-3p', 'hsa-miR-1973', 'hsa-miR-197-5p',\n",
+       "       'hsa-miR-642a-3p', 'hsa-miR-197-3p', 'hsa-miR-193b-3p',\n",
+       "       'hsa-miR-193a-5p', 'hsa-miR-1915-3p', 'hsa-miR-188-5p',\n",
+       "       'hsa-miR-185-5p', 'hsa-miR-181b-5p'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 288,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "top_met.index"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}