Update file mRNA_limma_dataset_xgb_described.ipynb

fce3b1c6 · aakan96 · 0c3f471f · fce3b1c6
Commit fce3b1c6 authored 1 year ago by aakan96
--- a/03_Machine_Learning/mRNA_limma_dataset_xgb_described.ipynb
+++ b/03_Machine_Learning/mRNA_limma_dataset_xgb_described.ipynb
@@ -8,6 +8,16 @@
    "# Load the libraries"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a3fe3fd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## TRAINING SET : MERGED DATA , TESTING DATA:  (GSE38129) : 60 samples (30 control , 30 ESCC), MODEL: XGBoost"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 3,

 %% Cell type:markdown id:c2a396f5 tags:

 # Load the libraries

+%% Cell type:code id:a3fe3fd6 tags:
+
+``` python
+## TRAINING SET : MERGED DATA , TESTING DATA:  (GSE38129) : 60 samples (30 control , 30 ESCC), MODEL: XGBoost
+```
+
 %% Cell type:code id:f097ad55 tags:

 ``` python
 import warnings
 warnings.filterwarnings('ignore')
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import RocCurveDisplay
 import matplotlib.pyplot as plt
 import numpy as np
 from imblearn.over_sampling import SMOTE
 from sklearn.linear_model import Lasso
 import xgboost as xgb
 from sklearn.model_selection import GridSearchCV
 import pandas as pd
 import numpy as np

 np.random.seed(7)
 ```

 %% Cell type:markdown id:73b6611a tags:

 # Read the data

 %% Cell type:code id:0eeb7a35 tags:

 ``` python
 df_train = pd.read_csv("DS/mRNA_DS_preprocessed_training_data.csv") # read the training data
 ```

 %% Cell type:code id:c04fc6bc tags:

 ``` python
 df_test = pd.read_csv("DS/mRNA_DS_test_data.csv") # read the test data
 ```

 %% Cell type:markdown id:5c60a7a5 tags:

 # Data Preprocessing

 %% Cell type:code id:683b63ce tags:

 ``` python
 df_train = df_train.T # transpose the data
 df_test = df_test.T
 ```

 %% Cell type:code id:6ecc5606 tags:

 ``` python
 #df_test = df_test[:-1]
 ```

 %% Cell type:code id:d9b0088e tags:

 ``` python
 #Transform the input data
 df_train.rename(columns=df_train.iloc[0], inplace = True)
 df_train.drop(df_train.index[0], inplace = True)
 df_train=df_train.reset_index()
 ```

 %% Cell type:code id:2e78017d tags:

 ``` python
 #Transform the input data
 df_test.rename(columns=df_test.iloc[-1], inplace = True)
 df_test.drop(df_test.index[-1], inplace = True)
 df_test=df_test.reset_index()
 ```

 %% Cell type:code id:7168825e tags:

 ``` python
 metadata_test = pd.read_csv("DS/mRNA_DS_metadata_col_test_info.csv") # read the column information (cancer/non cancer)
 ```

 %% Cell type:code id:26c98c3e tags:

 ``` python
 df_test= df_test.merge(metadata_test, left_on="index", right_on= "Unnamed: 0")
 ```

 %% Cell type:code id:b42beaa4 tags:

 ``` python
 df_test['title0'] = df_test['title0'].replace('(?i)mucosa|normal|healthy', 0, regex=True) # replace the names to 0

 ```

 %% Cell type:code id:515cd9fc tags:

 ``` python
 df_test['title0'] = df_test['title0'].replace('(?i)Tumor|Cancer|carcinoma', 1, regex=True)
 ```

 %% Cell type:code id:b4f38f66 tags:

 ``` python
 df_test['title0'].value_counts()
 ```

 %% Output

    title0
    0    30
    1    30
    Name: count, dtype: int64

 %% Cell type:code id:dcdefec2 tags:

 ``` python
 df_test = df_test[pd.to_numeric(df_test['title0'], errors='coerce').notnull()]#remove all non-numeric data from the column.
 ```

 %% Cell type:code id:ca48568a tags:

 ``` python
 df_test= df_test.drop(['index', 'Unnamed: 0'], axis=1)
 ```

 %% Cell type:code id:d4b7e8b7 tags:

 ``` python
 df_test= df_test.rename(columns={"title0": "index"})
 ```

 %% Cell type:code id:299ca65d tags:

 ``` python
 X_test=df_test.drop("index",axis=1)
 y_test=df_test['index']
 ```

 %% Cell type:code id:4c50c510 tags:

 ``` python
 metadata_train = pd.read_csv("DS/mRNA_DS_metadata_col_info.csv")
 ```

 %% Cell type:code id:6730cf89 tags:

 ``` python
 df_train= df_train.merge(metadata_train, left_on="index", right_on= "Unnamed: 0")
 ```

 %% Cell type:code id:7a8ad8ad tags:

 ``` python
 df_train['title0'] = df_train['title0'].replace('(?i)mucosa|normal|healthy', 0, regex=True)
 ```

 %% Cell type:code id:a8cf8643 tags:

 ``` python
 df_train['title0'] = df_train['title0'].replace('(?i)Tumor|Cancer|carcinoma', 1, regex=True)
 ```

 %% Cell type:code id:f5d203aa tags:

 ``` python
 df_train = df_train[pd.to_numeric(df_train['title0'], errors='coerce').notnull()]#remove all non-numeric data from the column.
 ```

 %% Cell type:code id:523bdaa6 tags:

 ``` python
 df_train= df_train.drop(['index', 'Unnamed: 0'], axis=1)
 ```

 %% Cell type:code id:46a6fb36 tags:

 ``` python
 df_train= df_train.rename(columns={"title0": "index"})
 ```

 %% Cell type:code id:e26f88c5 tags:

 ``` python
 df_train['index'].value_counts()
 ```

 %% Output

    index
    0    111
    1    108
    Name: count, dtype: int64

 %% Cell type:code id:fbaf2507 tags:

 ``` python
 df_train= df_train.apply(pd.to_numeric)
 ```

 %% Cell type:markdown id:1c45ed10 tags:

 # t-SNE

 %% Cell type:code id:38a993d9 tags:

 ``` python
 from sklearn.manifold import TSNE
 # t-SNE
 tsne = TSNE(n_components=2,perplexity=40)
 embedded_data = tsne.fit_transform(df_train)

 # Step 2: Separate data points by class
 class_1_indices = np.where(df_train['index'] == 0)[0]
 class_2_indices = np.where(df_train['index'] == 1)[0]

 class_1_data = embedded_data[class_1_indices]
 class_2_data = embedded_data[class_2_indices]

 # Step 3: Plot the t-SNE plot with different colors for each class
 plt.scatter(class_1_data[:, 0], class_1_data[:, 1], color='red', label='Non-Cancer')
 plt.scatter(class_2_data[:, 0], class_2_data[:, 1], color='blue', label='Cancer')

 plt.title('t-SNE Plot')
 plt.xlabel('Dimension 1')
 plt.ylabel('Dimension 2')
 plt.legend()
 plt.show()
 ```

 %% Output



 %% Cell type:markdown id:a295f3ad tags:

 #### t-distributed stochastic neighbor embedding (t-SNE) is a statistical method for visualizing high-dimensional data by giving each datapoint a location in a two dimensional map.  In the plot above we used perplexity of 40 for visualizing the t-SNE, but we didnt do any dimensionality reduction with it. Blue points are Cancer and the red are non-cancer.

 %% Cell type:code id:8c0011ea tags:

 ``` python
 X=df_train.drop("index",axis=1)
 y=df_train['index']
 ```

 %% Cell type:code id:a10804ed tags:

 ``` python
 X=X.astype('int')
 ```

 %% Cell type:code id:93e28118 tags:

 ``` python
 y=y.astype('int')
 ```

 %% Cell type:markdown id:e9830b6c tags:

 # Feature Selection (LASSO)

 %% Cell type:code id:1cc528fb tags:

 ``` python
 # LASSO model:
 lasso = Lasso(alpha=1)
 # fitting the model:
 lasso.fit(X, y)
 # select all coefficients and the feature names
 lasso_coefs = lasso.coef_
 feature_names = X.columns

 # collect the selected features:
 selected_feature_indices = np.nonzero(lasso_coefs)[0]
 selected_features = [feature_names[i] for i in selected_feature_indices]
 X_selected = X.iloc[:, selected_feature_indices]
 ```

 %% Cell type:code id:8afa29ae tags:

 ``` python
 len(selected_features)
 ```

 %% Output

    98

 %% Cell type:markdown id:6cee6462 tags:

 # Test train split

 %% Cell type:code id:cff9bd67 tags:

 ``` python
 X_train = X_selected
 y_train = y
 ```

 %% Cell type:code id:129430e6 tags:

 ``` python
 y_test.value_counts(),y_train.value_counts()
 ```

 %% Output

    (index
     0    30
     1    30
     Name: count, dtype: int64,
     index
     0    111
     1    108
     Name: count, dtype: int64)

 %% Cell type:markdown id:1cfe2a06 tags:

 # Cross validation

 %% Cell type:code id:059bf577 tags:

 ``` python
 model = xgb.XGBClassifier(random_state=42)

 # Defining parameter range
 param_grid = {
    'max_depth': [3,5],
    'learning_rate': [0.1 ,0.01, 0.001],
    'n_estimators': [100,200],
    'gamma': [ 0.1,0.01,0.001],
    'subsample': [0.8,1]
 }

 grid = GridSearchCV(model, param_grid, refit=True, verbose=3)

 # Fitting the model for grid search
 grid.fit(X_train, y_train)
 ```

 %% Output

    Fitting 5 folds for each of 72 candidates, totalling 360 fits
    [CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.864 total time=   0.3s
    [CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.886 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.953 total time=   0.4s
    [CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.795 total time=   0.4s
    [CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time=   0.3s
    [CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.977 total time=   0.3s
    [CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.886 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.7s
    [CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.864 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.932 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.886 total time=   0.2s
    [CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.953 total time=   0.2s
    [CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.864 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.932 total time=   0.2s
    [CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.930 total time=   0.2s
    [CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.795 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.955 total time=   0.2s
    [CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.773 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.636 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.860 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.864 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.932 total time=   0.2s
    [CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.3s
    [CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.907 total time=   0.2s
    [CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.773 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.750 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.932 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.884 total time=   0.1s

    [CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.818 total time=   0.2s
    [CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.955 total time=   0.2s
    [CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.2s
    [CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.841 total time=   0.2s
    [CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.773 total time=   0.2s
    [CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time=   0.2s
    [CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.909 total time=   0.3s
    [CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.614 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.864 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.659 total time=   0.2s
    [CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.659 total time=   0.2s
    [CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.636 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.884 total time=   1.4s
    [CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.659 total time=   2.0s
    [CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.864 total time=   0.9s
    [CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time=   1.6s
    [CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.614 total time=   0.1s
    [CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.864 total time=   0.1s
    [CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time=   0.2s
    [CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.659 total time=   0.4s
    [CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time=   0.2s
    [CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.3s
    [CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.3s
    [CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.659 total time=   0.5s
    [CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.614 total time=   0.2s
    [CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time=   0.5s
    [CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.886 total time=   0.4s
    [CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.930 total time=   1.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.750 total time=   0.2s
    [CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.930 total time=   0.2s
    [CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.886 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.977 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.727 total time=   0.2s
    [CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.955 total time=   0.1s
    [CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.977 total time=   0.2s
    [CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.1s

    [CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.953 total time=   0.2s
    [CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.2s
    [CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.886 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time=   0.1s
    [CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.841 total time=   0.3s
    [CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.886 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time=   0.6s
    [CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.795 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.955 total time=   0.1s
    [CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.773 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.636 total time=   0.1s
    [CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time=   0.2s
    [CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.860 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time=   0.2s
    [CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.864 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.932 total time=   0.1s
    [CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.2s
    [CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.909 total time=   0.5s
    [CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.907 total time=   0.8s
    [CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.773 total time=   1.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time=   0.4s
    [CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.750 total time=   0.7s
    [CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time=   1.8s
    [CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.932 total time=   3.4s
    [CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.884 total time=   3.2s
    [CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.818 total time=   1.7s
    [CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.955 total time=   1.9s
    [CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.2s
    [CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.7s
    [CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.841 total time=   0.2s
    [CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.773 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time=   0.3s
    [CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time=   0.1s
    [CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time=   0.1s
    [CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.614 total time=   0.1s
    [CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s

    [CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.864 total time=   0.1s
    [CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.659 total time=   0.2s
    [CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.3s
    [CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.659 total time=   0.3s
    [CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.636 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.2s
    [CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.864 total time=   0.1s
    [CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.614 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time=   0.2s
    [CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.864 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time=   0.2s
    [CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.659 total time=   0.2s
    [CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.614 total time=   0.2s
    [CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time=   0.3s
    [CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.886 total time=   0.2s
    [CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.930 total time=   0.3s
    [CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.750 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.930 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.886 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=1.000 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.727 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.2s
    [CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.955 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.953 total time=   0.2s
    [CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time=   0.2s
    [CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.4s
    [CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.977 total time=   0.3s
    [CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.864 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.2s
    [CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.886 total time=   0.5s
    [CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time=   0.5s
    [CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time=   0.6s
    [CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.932 total time=   0.4s
    [CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time=   0.9s
    [CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.864 total time=   1.2s
    [CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   1.4s
    [CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.2s
    [CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.953 total time=   0.1s

    [CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.886 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.953 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.795 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.955 total time=   0.2s
    [CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.773 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.636 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.860 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time=   0.2s
    [CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.2s
    [CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.2s
    [CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.864 total time=   0.2s
    [CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.932 total time=   0.2s
    [CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.2s
    [CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.907 total time=   0.2s
    [CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.773 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.955 total time=   0.2s
    [CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.750 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.932 total time=   0.2s
    [CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.818 total time=   0.3s
    [CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.955 total time=   0.2s
    [CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time=   0.2s
    [CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.3s
    [CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.841 total time=   0.2s
    [CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.773 total time=   0.2s
    [CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time=   0.2s
    [CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.909 total time=   0.2s
    [CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.884 total time=   0.3s
    [CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.614 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.864 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.659 total time=   0.2s
    [CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time=   0.3s
    [CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.3s
    [CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time=   0.2s
    [CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.659 total time=   0.2s
    [CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.636 total time=   0.2s
    [CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time=   0.3s
    [CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time=   0.2s
    [CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.864 total time=   0.1s

    [CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time=   0.1s
    [CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time=   0.1s
    [CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time=   0.1s
    [CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time=   0.1s
    [CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.614 total time=   0.1s
    [CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time=   0.1s
    [CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.864 total time=   0.2s
    [CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time=   0.2s
    [CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.659 total time=   0.3s
    [CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time=   0.3s
    [CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time=   0.3s
    [CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time=   0.3s
    [CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time=   0.2s
    [CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.659 total time=   0.2s
    [CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.614 total time=   0.2s
    [CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time=   0.3s
    [CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.886 total time=   0.3s
    [CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.930 total time=   0.2s

    GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,
                                         callbacks=None, colsample_bylevel=None,
                                         colsample_bynode=None,
                                         colsample_bytree=None,
                                         early_stopping_rounds=None,
                                         enable_categorical=False, eval_metric=None,
                                         feature_types=None, gamma=None,
                                         gpu_id=None, grow_policy=None,
                                         importance_type=None,
                                         interaction_constraints=None,
                                         learning_rate=None, max_b...
                                         max_cat_to_onehot=None,
                                         max_delta_step=None, max_depth=None,
                                         max_leaves=None, min_child_weight=None,
                                         missing=nan, monotone_constraints=None,
                                         n_estimators=100, n_jobs=None,
                                         num_parallel_tree=None, predictor=None,
                                         random_state=42, ...),
                 param_grid={'gamma': [0.1, 0.01, 0.001],
                             'learning_rate': [0.1, 0.01, 0.001],
                             'max_depth': [3, 5], 'n_estimators': [100, 200],
                             'subsample': [0.8, 1]},
                 verbose=3)

 %% Cell type:markdown id:6476d825 tags:

 # choose the best hyperparameters

 %% Cell type:code id:5d327876 tags:

 ``` python
 # print best parameter after tuning
 print(grid.best_params_)

 # print how our model looks after hyper-parameter tuning
 print(grid.best_estimator_)
 ```

 %% Output

    {'gamma': 0.001, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1}
    XGBClassifier(base_score=None, booster=None, callbacks=None,
                  colsample_bylevel=None, colsample_bynode=None,
                  colsample_bytree=None, early_stopping_rounds=None,
                  enable_categorical=False, eval_metric=None, feature_types=None,
                  gamma=0.001, gpu_id=None, grow_policy=None, importance_type=None,
                  interaction_constraints=None, learning_rate=0.1, max_bin=None,
                  max_cat_threshold=None, max_cat_to_onehot=None,
                  max_delta_step=None, max_depth=3, max_leaves=None,
                  min_child_weight=None, missing=nan, monotone_constraints=None,
                  n_estimators=100, n_jobs=None, num_parallel_tree=None,
                  predictor=None, random_state=42, ...)

 %% Cell type:code id:29da15ed tags:

 ``` python
 # Select columns in df1 based on columns in df2
 X_test = X_test.loc[:, X_train.columns]
 ```

 %% Cell type:code id:d77bf449 tags:

 ``` python
 X_test = X_test.astype('int')
 ```

 %% Cell type:code id:3b2776c0 tags:

 ``` python
 model_xgb = grid.best_estimator_
 model_xgb.fit(X_train,y_train)
 ```

 %% Output

    XGBClassifier(base_score=None, booster=None, callbacks=None,
                  colsample_bylevel=None, colsample_bynode=None,
                  colsample_bytree=None, early_stopping_rounds=None,
                  enable_categorical=False, eval_metric=None, feature_types=None,
                  gamma=0.001, gpu_id=None, grow_policy=None, importance_type=None,
                  interaction_constraints=None, learning_rate=0.1, max_bin=None,
                  max_cat_threshold=None, max_cat_to_onehot=None,
                  max_delta_step=None, max_depth=3, max_leaves=None,
                  min_child_weight=None, missing=nan, monotone_constraints=None,
                  n_estimators=100, n_jobs=None, num_parallel_tree=None,
                  predictor=None, random_state=42, ...)

 %% Cell type:code id:94871ada tags:

 ``` python
 y_proba = model_xgb.fit(X_train, y_train).predict_proba(X_test)[:,1]
 ```

 %% Cell type:markdown id:d1c59b26 tags:

 # ROC Curve and Classification report

 %% Cell type:code id:fddf8856 tags:

 ``` python
 from sklearn.metrics import roc_curve, auc
 # Calculate the false positive rate (FPR), true positive rate (TPR), and thresholds
 fpr, tpr, thresholds = roc_curve(y_test, y_proba)

 # Calculate the area under the ROC curve (AUC)
 roc_auc = auc(fpr, tpr)

 # Plot the ROC curve
 plt.figure(figsize=(8, 6))
 plt.plot(fpr, tpr, color='blue', label='ROC curve (area = %0.2f)' % roc_auc)
 plt.plot([0, 1], [0, 1], color='red', linestyle='--')
 plt.xlim([0.0, 1.0])
 plt.ylim([0.0, 1.05])
 plt.xlabel('False Positive Rate')
 plt.ylabel('True Positive Rate')
 plt.title('XGBoost')
 plt.legend(loc='lower right')
 plt.show()
 ```

 %% Output



 %% Cell type:markdown id:2ed2e4bf tags:

 ### The ROC curve above shows that the XGBoost model has an AUC of 0.97 which indicates that the model preformed good in classification of non cancer and cancer.

 %% Cell type:code id:603f38e9 tags:

 ``` python
 # Predict probabilities for the training data
 y_train_proba = model_xgb.predict_proba(X_train)[:, 1]

 # Calculate the false positive rate (FPR), true positive rate (TPR), and thresholds
 fpr_train, tpr_train, thresholds_train = roc_curve(y_train, y_train_proba)

 # Calculate the area under the ROC curve (AUC) for training data
 roc_auc_train = auc(fpr_train, tpr_train)

 # Plot the ROC curve for training data
 plt.figure(figsize=(8, 6))
 plt.plot(fpr_train, tpr_train, color='blue', label='ROC curve (area = %0.2f)' % roc_auc_train)
 plt.plot([0, 1], [0, 1], color='red', linestyle='--')
 plt.xlim([0.0, 1.0])
 plt.ylim([0.0, 1.05])
 plt.xlabel('False Positive Rate')
 plt.ylabel('True Positive Rate')
 plt.title('XGBoost - ROC Curve for Training Data')
 plt.legend(loc='lower right')
 plt.show()
 ```

 %% Output



 %% Cell type:markdown id:942e1a1f tags:

 #### In the ROC curve above we can see that the AUC value of 1 for the train set which indicates the model's flawless ability to distinguish between positive and negative classes during training, showcasing excellent performance on the training data.

 %% Cell type:code id:169beac5 tags:

 ``` python
 from sklearn.metrics import classification_report, confusion_matrix
 grid_predictions = grid.predict(X_test)
 print(classification_report(y_test, grid_predictions))
 ```

 %% Output

                  precision    recall  f1-score   support
    
               0       1.00      0.87      0.93        30
               1       0.88      1.00      0.94        30
    
        accuracy                           0.93        60
       macro avg       0.94      0.93      0.93        60
    weighted avg       0.94      0.93      0.93        60
    

 %% Cell type:markdown id:ce1b8ee7 tags:

 ### The model displayed high precision and recall for both classes, resulting in an overall F1-score of 0.93, indicating its strong performance on the test dataset.

 %% Cell type:code id:aeeb0b36 tags:

 ``` python
 #######CONFUSION MATRIX ###########
 from sklearn import metrics
 y_test_pred_xgb = model_xgb.predict(X_test)
 confusion_matrix_test = metrics.confusion_matrix(y_test, y_test_pred_xgb)
 cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix_test, display_labels = [False, True])
 cm_display.plot()
 plt.show()
 ```

 %% Output



 %% Cell type:markdown id:74e5f71e tags:

 ### The XGBoost model achieved 26 true positive predictions and 30 true negative predictions, with only 4 false positives and no false negatives, indicating strong overall performance in correctly classifying positive and negative samples.

 %% Cell type:code id:ca09e716 tags:

 ``` python
 total1=sum(sum(confusion_matrix_test))
 #####from confusion matrix calculate accuracy
 accuracy1=(confusion_matrix_test[0,0]+confusion_matrix_test[1,1])/total1
 print ('Accuracy : ', accuracy1)

 sensitivity1 = confusion_matrix_test[0,0]/(confusion_matrix_test[0,0]+confusion_matrix_test[0,1])
 print('Sensitivity : ', sensitivity1 )

 specificity1 = confusion_matrix_test[1,1]/(confusion_matrix_test[1,0]+confusion_matrix_test[1,1])
 print('Specificity : ', specificity1)
 ```

 %% Output

    Accuracy :  0.9333333333333333
    Sensitivity :  0.8666666666666667
    Specificity :  1.0

 %% Cell type:markdown id:a8ae087b tags:

 ### The XGBoost model demonstrated strong performance with an accuracy of 93.3%, correctly identifying all negative samples with a specificity of 100% and achieving a high sensitivity of 86.7% for positive samples.

 %% Cell type:markdown id:4c85b645 tags:

 # Feature importance:

 %% Cell type:code id:c1095af0 tags:

 ``` python
 # for important features:
 important_feat = model_xgb.feature_importances_
 #get indices of those important features
 idx = important_feat.argsort(kind= "quicksort")
 idx= idx[::-1][:50]
 ```

 %% Cell type:code id:ae7e0162 tags:

 ``` python
 df1 = X_selected.T
 ```

 %% Cell type:code id:1d97f818 tags:

 ``` python
 top_met = df1.iloc[idx]
 ```

 %% Cell type:code id:4cd4227b tags:

 ``` python
 top_met.index
 ```

 %% Output

    Index(['CRISP3', 'TRIP13', 'RPN1', 'CRNN', 'ALDH9A1', 'ECT2', 'HSPB8', 'MCM2',
           'EMP1', 'SIM2', 'RAB11FIP1', 'COL1A1', 'EFNA1', 'IGFBP3', 'KANK1',
           'CFD', 'CLIC3', 'ID4', 'CH25H', 'LCN2', 'DHRS1', 'IGF2BP2', 'MMP10',
           'ANO1', 'MYH10', 'LAMC2', 'SLK', 'ZBTB16', 'AIM2', 'AQP3', 'COL5A2',
           'UCHL1', 'ENTPD6', 'GALE', 'ATP6V1D', 'HSPBAP1', 'GPX3', 'LEPROTL1',
           'NT5C2', 'SCNN1A', 'FSCN1', 'ERCC3', 'TMPRSS11D', 'RHCG', 'PTN',
           'CRABP2', 'DHRS2', 'GABRP', 'FLG', 'TMF1'],
          dtype='object')

 %% Cell type:code id:8f6d88bb tags:

 ``` python
 X_selected.columns
 ```

 %% Output

    Index(['ACLY', 'ACPP', 'AIM2', 'ALDH9A1', 'ALOX12', 'ANO1', 'AQP3', 'ATP6V1D',
           'CCNG2', 'CES2', 'CFD', 'CH25H', 'CLIC3', 'COL1A1', 'COL5A2', 'CRABP2',
           'CRISP3', 'CRNN', 'CYP4B1', 'DHRS1', 'DHRS2', 'DUOX1', 'DUSP5', 'ECM1',
           'ECT2', 'EFNA1', 'EMP1', 'ENTPD6', 'ERCC3', 'FLG', 'FSCN1', 'GABRP',
           'GALE', 'GALNT1', 'GPX3', 'HOPX', 'HSPB8', 'HSPBAP1', 'HSPD1', 'ID4',
           'IFI35', 'IGF2BP2', 'IGFBP3', 'IL1RN', 'INPP1', 'KANK1', 'KLK13',
           'KRT4', 'LAMC2', 'LCN2', 'LEPROTL1', 'LYPD3', 'MAL', 'MCM2', 'MMP10',
           'MUC1', 'MYH10', 'NDRG2', 'NT5C2', 'PCSK5', 'PHLDA1', 'PITX1',
           'PPP1R3C', 'PSMB9', 'PTN', 'RAB11FIP1', 'RANBP9', 'RHCG', 'RND3',
           'RPN1', 'RUVBL1', 'SCNN1A', 'SERPINB13', 'SERPINB2', 'SIM2', 'SLC2A1',
           'SLK', 'SLURP1', 'SPINK5', 'SPRR3', 'SSRP1', 'STK24', 'SYNPO2L',
           'TAPBP', 'TFAP2B', 'TGIF1', 'TIAM1', 'TJP1', 'TMF1', 'TMPRSS11D',
           'TMPRSS11E', 'TRIP13', 'TSPAN6', 'TST', 'TYMP', 'UCHL1', 'ZBTB16',
           'ZNF185'],
          dtype='object')

 %% Cell type:code id:cf86e1f2 tags:

 ``` python
 from matplotlib import pyplot as plt
 ```

 %% Cell type:code id:6353bd9e tags:

 ``` python
 sorted_idx = model_xgb.feature_importances_.argsort()
 plt.barh(top_met.index, model_xgb.feature_importances_[idx])[::-1]
 plt.xlabel("Xgboost Feature Importance")
 ```

 %% Output

    Text(0.5, 0, 'Xgboost Feature Importance')



 %% Cell type:markdown id:29a668b0 tags:

 #### The above plot shows the feature importance from low importance to high importance found by the model.