Skip to content
Snippets Groups Projects
Commit fce3b1c6 authored by aakan96's avatar aakan96
Browse files

Update file mRNA_limma_dataset_xgb_described.ipynb

parent 0c3f471f
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:c2a396f5 tags:
# Load the libraries
%% Cell type:code id:a3fe3fd6 tags:
``` python
## TRAINING SET : MERGED DATA , TESTING DATA: (GSE38129) : 60 samples (30 control , 30 ESCC), MODEL: XGBoost
```
%% Cell type:code id:f097ad55 tags:
``` python
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import RocCurveDisplay
import matplotlib.pyplot as plt
import numpy as np
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import Lasso
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
import pandas as pd
import numpy as np
np.random.seed(7)
```
%% Cell type:markdown id:73b6611a tags:
# Read the data
%% Cell type:code id:0eeb7a35 tags:
``` python
df_train = pd.read_csv("DS/mRNA_DS_preprocessed_training_data.csv") # read the training data
```
%% Cell type:code id:c04fc6bc tags:
``` python
df_test = pd.read_csv("DS/mRNA_DS_test_data.csv") # read the test data
```
%% Cell type:markdown id:5c60a7a5 tags:
# Data Preprocessing
%% Cell type:code id:683b63ce tags:
``` python
df_train = df_train.T # transpose the data
df_test = df_test.T
```
%% Cell type:code id:6ecc5606 tags:
``` python
#df_test = df_test[:-1]
```
%% Cell type:code id:d9b0088e tags:
``` python
#Transform the input data
df_train.rename(columns=df_train.iloc[0], inplace = True)
df_train.drop(df_train.index[0], inplace = True)
df_train=df_train.reset_index()
```
%% Cell type:code id:2e78017d tags:
``` python
#Transform the input data
df_test.rename(columns=df_test.iloc[-1], inplace = True)
df_test.drop(df_test.index[-1], inplace = True)
df_test=df_test.reset_index()
```
%% Cell type:code id:7168825e tags:
``` python
metadata_test = pd.read_csv("DS/mRNA_DS_metadata_col_test_info.csv") # read the column information (cancer/non cancer)
```
%% Cell type:code id:26c98c3e tags:
``` python
df_test= df_test.merge(metadata_test, left_on="index", right_on= "Unnamed: 0")
```
%% Cell type:code id:b42beaa4 tags:
``` python
df_test['title0'] = df_test['title0'].replace('(?i)mucosa|normal|healthy', 0, regex=True) # replace the names to 0
```
%% Cell type:code id:515cd9fc tags:
``` python
df_test['title0'] = df_test['title0'].replace('(?i)Tumor|Cancer|carcinoma', 1, regex=True)
```
%% Cell type:code id:b4f38f66 tags:
``` python
df_test['title0'].value_counts()
```
%% Output
title0
0 30
1 30
Name: count, dtype: int64
%% Cell type:code id:dcdefec2 tags:
``` python
df_test = df_test[pd.to_numeric(df_test['title0'], errors='coerce').notnull()]#remove all non-numeric data from the column.
```
%% Cell type:code id:ca48568a tags:
``` python
df_test= df_test.drop(['index', 'Unnamed: 0'], axis=1)
```
%% Cell type:code id:d4b7e8b7 tags:
``` python
df_test= df_test.rename(columns={"title0": "index"})
```
%% Cell type:code id:299ca65d tags:
``` python
X_test=df_test.drop("index",axis=1)
y_test=df_test['index']
```
%% Cell type:code id:4c50c510 tags:
``` python
metadata_train = pd.read_csv("DS/mRNA_DS_metadata_col_info.csv")
```
%% Cell type:code id:6730cf89 tags:
``` python
df_train= df_train.merge(metadata_train, left_on="index", right_on= "Unnamed: 0")
```
%% Cell type:code id:7a8ad8ad tags:
``` python
df_train['title0'] = df_train['title0'].replace('(?i)mucosa|normal|healthy', 0, regex=True)
```
%% Cell type:code id:a8cf8643 tags:
``` python
df_train['title0'] = df_train['title0'].replace('(?i)Tumor|Cancer|carcinoma', 1, regex=True)
```
%% Cell type:code id:f5d203aa tags:
``` python
df_train = df_train[pd.to_numeric(df_train['title0'], errors='coerce').notnull()]#remove all non-numeric data from the column.
```
%% Cell type:code id:523bdaa6 tags:
``` python
df_train= df_train.drop(['index', 'Unnamed: 0'], axis=1)
```
%% Cell type:code id:46a6fb36 tags:
``` python
df_train= df_train.rename(columns={"title0": "index"})
```
%% Cell type:code id:e26f88c5 tags:
``` python
df_train['index'].value_counts()
```
%% Output
index
0 111
1 108
Name: count, dtype: int64
%% Cell type:code id:fbaf2507 tags:
``` python
df_train= df_train.apply(pd.to_numeric)
```
%% Cell type:markdown id:1c45ed10 tags:
# t-SNE
%% Cell type:code id:38a993d9 tags:
``` python
from sklearn.manifold import TSNE
# t-SNE
tsne = TSNE(n_components=2,perplexity=40)
embedded_data = tsne.fit_transform(df_train)
# Step 2: Separate data points by class
class_1_indices = np.where(df_train['index'] == 0)[0]
class_2_indices = np.where(df_train['index'] == 1)[0]
class_1_data = embedded_data[class_1_indices]
class_2_data = embedded_data[class_2_indices]
# Step 3: Plot the t-SNE plot with different colors for each class
plt.scatter(class_1_data[:, 0], class_1_data[:, 1], color='red', label='Non-Cancer')
plt.scatter(class_2_data[:, 0], class_2_data[:, 1], color='blue', label='Cancer')
plt.title('t-SNE Plot')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.legend()
plt.show()
```
%% Output
%% Cell type:markdown id:a295f3ad tags:
#### t-distributed stochastic neighbor embedding (t-SNE) is a statistical method for visualizing high-dimensional data by giving each datapoint a location in a two dimensional map. In the plot above we used perplexity of 40 for visualizing the t-SNE, but we didnt do any dimensionality reduction with it. Blue points are Cancer and the red are non-cancer.
%% Cell type:code id:8c0011ea tags:
``` python
X=df_train.drop("index",axis=1)
y=df_train['index']
```
%% Cell type:code id:a10804ed tags:
``` python
X=X.astype('int')
```
%% Cell type:code id:93e28118 tags:
``` python
y=y.astype('int')
```
%% Cell type:markdown id:e9830b6c tags:
# Feature Selection (LASSO)
%% Cell type:code id:1cc528fb tags:
``` python
# LASSO model:
lasso = Lasso(alpha=1)
# fitting the model:
lasso.fit(X, y)
# select all coefficients and the feature names
lasso_coefs = lasso.coef_
feature_names = X.columns
# collect the selected features:
selected_feature_indices = np.nonzero(lasso_coefs)[0]
selected_features = [feature_names[i] for i in selected_feature_indices]
X_selected = X.iloc[:, selected_feature_indices]
```
%% Cell type:code id:8afa29ae tags:
``` python
len(selected_features)
```
%% Output
98
%% Cell type:markdown id:6cee6462 tags:
# Test train split
%% Cell type:code id:cff9bd67 tags:
``` python
X_train = X_selected
y_train = y
```
%% Cell type:code id:129430e6 tags:
``` python
y_test.value_counts(),y_train.value_counts()
```
%% Output
(index
0 30
1 30
Name: count, dtype: int64,
index
0 111
1 108
Name: count, dtype: int64)
%% Cell type:markdown id:1cfe2a06 tags:
# Cross validation
%% Cell type:code id:059bf577 tags:
``` python
model = xgb.XGBClassifier(random_state=42)
# Defining parameter range
param_grid = {
'max_depth': [3,5],
'learning_rate': [0.1 ,0.01, 0.001],
'n_estimators': [100,200],
'gamma': [ 0.1,0.01,0.001],
'subsample': [0.8,1]
}
grid = GridSearchCV(model, param_grid, refit=True, verbose=3)
# Fitting the model for grid search
grid.fit(X_train, y_train)
```
%% Output
Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.864 total time= 0.3s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.2s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.886 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.953 total time= 0.4s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.795 total time= 0.4s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time= 0.3s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.977 total time= 0.3s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.886 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.7s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.864 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.932 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.886 total time= 0.2s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.953 total time= 0.2s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.864 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.932 total time= 0.2s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.930 total time= 0.2s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.795 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.955 total time= 0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.773 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.636 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.860 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.864 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.932 total time= 0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.3s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.907 total time= 0.2s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.773 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.750 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.932 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.818 total time= 0.2s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.955 total time= 0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.2s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.2s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.841 total time= 0.2s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.773 total time= 0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time= 0.2s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.909 total time= 0.3s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.614 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.864 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.659 total time= 0.2s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.659 total time= 0.2s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.636 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.884 total time= 1.4s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.659 total time= 2.0s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.864 total time= 0.9s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time= 1.6s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.614 total time= 0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.864 total time= 0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time= 0.2s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.659 total time= 0.4s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time= 0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.3s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.3s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.659 total time= 0.5s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.614 total time= 0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time= 0.5s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.886 total time= 0.4s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.930 total time= 1.1s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.750 total time= 0.2s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.930 total time= 0.2s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.886 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.977 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.727 total time= 0.2s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.955 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.977 total time= 0.2s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.953 total time= 0.2s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.2s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.886 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time= 0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.841 total time= 0.3s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.886 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time= 0.6s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.795 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.955 total time= 0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.773 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.636 total time= 0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time= 0.2s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.860 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time= 0.2s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.864 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.932 total time= 0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.2s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.909 total time= 0.5s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.907 total time= 0.8s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.773 total time= 1.1s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time= 0.4s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.750 total time= 0.7s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time= 1.8s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.932 total time= 3.4s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.884 total time= 3.2s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.818 total time= 1.7s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.955 total time= 1.9s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.2s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.7s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.841 total time= 0.2s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.773 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time= 0.3s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time= 0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.614 total time= 0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.864 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.659 total time= 0.2s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.2s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.3s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.659 total time= 0.3s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.636 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.2s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.864 total time= 0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.614 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time= 0.2s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.864 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time= 0.2s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.2s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.659 total time= 0.2s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.614 total time= 0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time= 0.3s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.886 total time= 0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.930 total time= 0.3s
[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.750 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8;, score=0.930 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.886 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=1.000 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.727 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.2s
[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.955 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8;, score=0.953 total time= 0.2s
[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time= 0.2s
[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.4s
[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.977 total time= 0.3s
[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.864 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.2s
[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.886 total time= 0.5s
[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time= 0.5s
[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.977 total time= 0.6s
[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.932 total time= 0.4s
[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time= 0.9s
[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.864 total time= 1.2s
[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 1.4s
[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.2s
[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.2s
[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.886 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1;, score=0.953 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.795 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.955 total time= 0.2s
[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.1s
[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.773 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.636 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1;, score=0.860 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time= 0.2s
[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.2s
[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.2s
[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.864 total time= 0.2s
[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.932 total time= 0.2s
[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.2s
[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1;, score=0.907 total time= 0.2s
[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.773 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.977 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.955 total time= 0.2s
[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.750 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.932 total time= 0.2s
[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.818 total time= 0.3s
[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.955 total time= 0.2s
[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.977 total time= 0.2s
[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.3s
[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.841 total time= 0.2s
[CV 2/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.773 total time= 0.2s
[CV 3/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time= 0.2s
[CV 4/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.909 total time= 0.2s
[CV 5/5] END gamma=0.001, learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1;, score=0.884 total time= 0.3s
[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.886 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.614 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.864 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=100, subsample=1;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.659 total time= 0.2s
[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time= 0.3s
[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.3s
[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.886 total time= 0.2s
[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.659 total time= 0.2s
[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.636 total time= 0.2s
[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.955 total time= 0.3s
[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.886 total time= 0.2s
[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=3, n_estimators=200, subsample=1;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.864 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.932 total time= 0.1s
[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.909 total time= 0.1s
[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=0.8;, score=0.884 total time= 0.1s
[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.659 total time= 0.1s
[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.614 total time= 0.1s
[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.955 total time= 0.1s
[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.864 total time= 0.2s
[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=100, subsample=1;, score=0.930 total time= 0.2s
[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.659 total time= 0.3s
[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time= 0.3s
[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.932 total time= 0.3s
[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.909 total time= 0.3s
[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=0.8;, score=0.884 total time= 0.2s
[CV 1/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.659 total time= 0.2s
[CV 2/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.614 total time= 0.2s
[CV 3/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.955 total time= 0.3s
[CV 4/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.886 total time= 0.3s
[CV 5/5] END gamma=0.001, learning_rate=0.001, max_depth=5, n_estimators=200, subsample=1;, score=0.930 total time= 0.2s
GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,
callbacks=None, colsample_bylevel=None,
colsample_bynode=None,
colsample_bytree=None,
early_stopping_rounds=None,
enable_categorical=False, eval_metric=None,
feature_types=None, gamma=None,
gpu_id=None, grow_policy=None,
importance_type=None,
interaction_constraints=None,
learning_rate=None, max_b...
max_cat_to_onehot=None,
max_delta_step=None, max_depth=None,
max_leaves=None, min_child_weight=None,
missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=None,
num_parallel_tree=None, predictor=None,
random_state=42, ...),
param_grid={'gamma': [0.1, 0.01, 0.001],
'learning_rate': [0.1, 0.01, 0.001],
'max_depth': [3, 5], 'n_estimators': [100, 200],
'subsample': [0.8, 1]},
verbose=3)
%% Cell type:markdown id:6476d825 tags:
# choose the best hyperparameters
%% Cell type:code id:5d327876 tags:
``` python
# print best parameter after tuning
print(grid.best_params_)
# print how our model looks after hyper-parameter tuning
print(grid.best_estimator_)
```
%% Output
{'gamma': 0.001, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1}
XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=0.001, gpu_id=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=0.1, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=3, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=None, num_parallel_tree=None,
predictor=None, random_state=42, ...)
%% Cell type:code id:29da15ed tags:
``` python
# Select columns in df1 based on columns in df2
X_test = X_test.loc[:, X_train.columns]
```
%% Cell type:code id:d77bf449 tags:
``` python
X_test = X_test.astype('int')
```
%% Cell type:code id:3b2776c0 tags:
``` python
model_xgb = grid.best_estimator_
model_xgb.fit(X_train,y_train)
```
%% Output
XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=0.001, gpu_id=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=0.1, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=3, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
n_estimators=100, n_jobs=None, num_parallel_tree=None,
predictor=None, random_state=42, ...)
%% Cell type:code id:94871ada tags:
``` python
y_proba = model_xgb.fit(X_train, y_train).predict_proba(X_test)[:,1]
```
%% Cell type:markdown id:d1c59b26 tags:
# ROC Curve and Classification report
%% Cell type:code id:fddf8856 tags:
``` python
from sklearn.metrics import roc_curve, auc
# Calculate the false positive rate (FPR), true positive rate (TPR), and thresholds
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
# Calculate the area under the ROC curve (AUC)
roc_auc = auc(fpr, tpr)
# Plot the ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='blue', label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='red', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('XGBoost')
plt.legend(loc='lower right')
plt.show()
```
%% Output
%% Cell type:markdown id:2ed2e4bf tags:
### The ROC curve above shows that the XGBoost model has an AUC of 0.97 which indicates that the model preformed good in classification of non cancer and cancer.
%% Cell type:code id:603f38e9 tags:
``` python
# Predict probabilities for the training data
y_train_proba = model_xgb.predict_proba(X_train)[:, 1]
# Calculate the false positive rate (FPR), true positive rate (TPR), and thresholds
fpr_train, tpr_train, thresholds_train = roc_curve(y_train, y_train_proba)
# Calculate the area under the ROC curve (AUC) for training data
roc_auc_train = auc(fpr_train, tpr_train)
# Plot the ROC curve for training data
plt.figure(figsize=(8, 6))
plt.plot(fpr_train, tpr_train, color='blue', label='ROC curve (area = %0.2f)' % roc_auc_train)
plt.plot([0, 1], [0, 1], color='red', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('XGBoost - ROC Curve for Training Data')
plt.legend(loc='lower right')
plt.show()
```
%% Output
%% Cell type:markdown id:942e1a1f tags:
#### In the ROC curve above we can see that the AUC value of 1 for the train set which indicates the model's flawless ability to distinguish between positive and negative classes during training, showcasing excellent performance on the training data.
%% Cell type:code id:169beac5 tags:
``` python
from sklearn.metrics import classification_report, confusion_matrix
grid_predictions = grid.predict(X_test)
print(classification_report(y_test, grid_predictions))
```
%% Output
precision recall f1-score support
0 1.00 0.87 0.93 30
1 0.88 1.00 0.94 30
accuracy 0.93 60
macro avg 0.94 0.93 0.93 60
weighted avg 0.94 0.93 0.93 60
%% Cell type:markdown id:ce1b8ee7 tags:
### The model displayed high precision and recall for both classes, resulting in an overall F1-score of 0.93, indicating its strong performance on the test dataset.
%% Cell type:code id:aeeb0b36 tags:
``` python
#######CONFUSION MATRIX ###########
from sklearn import metrics
y_test_pred_xgb = model_xgb.predict(X_test)
confusion_matrix_test = metrics.confusion_matrix(y_test, y_test_pred_xgb)
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix_test, display_labels = [False, True])
cm_display.plot()
plt.show()
```
%% Output
%% Cell type:markdown id:74e5f71e tags:
### The XGBoost model achieved 26 true positive predictions and 30 true negative predictions, with only 4 false positives and no false negatives, indicating strong overall performance in correctly classifying positive and negative samples.
%% Cell type:code id:ca09e716 tags:
``` python
total1=sum(sum(confusion_matrix_test))
#####from confusion matrix calculate accuracy
accuracy1=(confusion_matrix_test[0,0]+confusion_matrix_test[1,1])/total1
print ('Accuracy : ', accuracy1)
sensitivity1 = confusion_matrix_test[0,0]/(confusion_matrix_test[0,0]+confusion_matrix_test[0,1])
print('Sensitivity : ', sensitivity1 )
specificity1 = confusion_matrix_test[1,1]/(confusion_matrix_test[1,0]+confusion_matrix_test[1,1])
print('Specificity : ', specificity1)
```
%% Output
Accuracy : 0.9333333333333333
Sensitivity : 0.8666666666666667
Specificity : 1.0
%% Cell type:markdown id:a8ae087b tags:
### The XGBoost model demonstrated strong performance with an accuracy of 93.3%, correctly identifying all negative samples with a specificity of 100% and achieving a high sensitivity of 86.7% for positive samples.
%% Cell type:markdown id:4c85b645 tags:
# Feature importance:
%% Cell type:code id:c1095af0 tags:
``` python
# for important features:
important_feat = model_xgb.feature_importances_
#get indices of those important features
idx = important_feat.argsort(kind= "quicksort")
idx= idx[::-1][:50]
```
%% Cell type:code id:ae7e0162 tags:
``` python
df1 = X_selected.T
```
%% Cell type:code id:1d97f818 tags:
``` python
top_met = df1.iloc[idx]
```
%% Cell type:code id:4cd4227b tags:
``` python
top_met.index
```
%% Output
Index(['CRISP3', 'TRIP13', 'RPN1', 'CRNN', 'ALDH9A1', 'ECT2', 'HSPB8', 'MCM2',
'EMP1', 'SIM2', 'RAB11FIP1', 'COL1A1', 'EFNA1', 'IGFBP3', 'KANK1',
'CFD', 'CLIC3', 'ID4', 'CH25H', 'LCN2', 'DHRS1', 'IGF2BP2', 'MMP10',
'ANO1', 'MYH10', 'LAMC2', 'SLK', 'ZBTB16', 'AIM2', 'AQP3', 'COL5A2',
'UCHL1', 'ENTPD6', 'GALE', 'ATP6V1D', 'HSPBAP1', 'GPX3', 'LEPROTL1',
'NT5C2', 'SCNN1A', 'FSCN1', 'ERCC3', 'TMPRSS11D', 'RHCG', 'PTN',
'CRABP2', 'DHRS2', 'GABRP', 'FLG', 'TMF1'],
dtype='object')
%% Cell type:code id:8f6d88bb tags:
``` python
X_selected.columns
```
%% Output
Index(['ACLY', 'ACPP', 'AIM2', 'ALDH9A1', 'ALOX12', 'ANO1', 'AQP3', 'ATP6V1D',
'CCNG2', 'CES2', 'CFD', 'CH25H', 'CLIC3', 'COL1A1', 'COL5A2', 'CRABP2',
'CRISP3', 'CRNN', 'CYP4B1', 'DHRS1', 'DHRS2', 'DUOX1', 'DUSP5', 'ECM1',
'ECT2', 'EFNA1', 'EMP1', 'ENTPD6', 'ERCC3', 'FLG', 'FSCN1', 'GABRP',
'GALE', 'GALNT1', 'GPX3', 'HOPX', 'HSPB8', 'HSPBAP1', 'HSPD1', 'ID4',
'IFI35', 'IGF2BP2', 'IGFBP3', 'IL1RN', 'INPP1', 'KANK1', 'KLK13',
'KRT4', 'LAMC2', 'LCN2', 'LEPROTL1', 'LYPD3', 'MAL', 'MCM2', 'MMP10',
'MUC1', 'MYH10', 'NDRG2', 'NT5C2', 'PCSK5', 'PHLDA1', 'PITX1',
'PPP1R3C', 'PSMB9', 'PTN', 'RAB11FIP1', 'RANBP9', 'RHCG', 'RND3',
'RPN1', 'RUVBL1', 'SCNN1A', 'SERPINB13', 'SERPINB2', 'SIM2', 'SLC2A1',
'SLK', 'SLURP1', 'SPINK5', 'SPRR3', 'SSRP1', 'STK24', 'SYNPO2L',
'TAPBP', 'TFAP2B', 'TGIF1', 'TIAM1', 'TJP1', 'TMF1', 'TMPRSS11D',
'TMPRSS11E', 'TRIP13', 'TSPAN6', 'TST', 'TYMP', 'UCHL1', 'ZBTB16',
'ZNF185'],
dtype='object')
%% Cell type:code id:cf86e1f2 tags:
``` python
from matplotlib import pyplot as plt
```
%% Cell type:code id:6353bd9e tags:
``` python
sorted_idx = model_xgb.feature_importances_.argsort()
plt.barh(top_met.index, model_xgb.feature_importances_[idx])[::-1]
plt.xlabel("Xgboost Feature Importance")
```
%% Output
Text(0.5, 0, 'Xgboost Feature Importance')
%% Cell type:markdown id:29a668b0 tags:
#### The above plot shows the feature importance from low importance to high importance found by the model.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment