diff --git a/Machine Learning/DS_miRNA_limma_dataset_xgb_final-F.ipynb b/Machine Learning/DS_miRNA_limma_dataset_xgb_final-F.ipynb index e19cab4d08e4dff8eb9df8eedce78d1e53ed6add..45657d71aa1283f784d0dfce0591ca031d469ce8 100644 --- a/Machine Learning/DS_miRNA_limma_dataset_xgb_final-F.ipynb +++ b/Machine Learning/DS_miRNA_limma_dataset_xgb_final-F.ipynb @@ -91,391 +91,6 @@ "df=df.reset_index()" ] }, - { - "cell_type": "code", - "execution_count": 43, - "id": "1647a959", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>index</th>\n", - " <th>dmr_3</th>\n", - " <th>dmr_31a</th>\n", - " <th>dmr_6</th>\n", - " <th>ebv-miR-BART13</th>\n", - " <th>hsa-let-7c</th>\n", - " <th>hsa-let-7d-5p</th>\n", - " <th>hsa-let-7i-5p</th>\n", - " <th>hsa-miR-100-5p</th>\n", - " <th>hsa-miR-101-3p</th>\n", - " <th>...</th>\n", - " <th>hsv2-miR-H24</th>\n", - " <th>hsv2-miR-H25</th>\n", - " <th>hsv2-miR-H6</th>\n", - " <th>hur_1</th>\n", - " <th>hur_2</th>\n", - " <th>hur_4</th>\n", - " <th>hur_5</th>\n", - " <th>hur_6</th>\n", - " <th>miRNABrightCorner30</th>\n", - " <th>mr_1</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>GSM1069774</td>\n", - " <td>0.732675</td>\n", - " <td>-0.242559</td>\n", - " <td>0.577801</td>\n", - " <td>-4.469532</td>\n", - " <td>1.195899</td>\n", - " <td>-0.334742</td>\n", - " <td>0.89199</td>\n", - " <td>-2.089223</td>\n", - " <td>-2.757097</td>\n", - " <td>...</td>\n", - " <td>-3.956004</td>\n", - " <td>-3.936689</td>\n", - " <td>-4.099346</td>\n", - " <td>6.98856</td>\n", - " <td>7.041557</td>\n", - " <td>3.822267</td>\n", - " <td>-2.268209</td>\n", - " <td>5.114399</td>\n", - " <td>2.017444</td>\n", - " <td>1.640437</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>GSM1069775</td>\n", - " <td>0.249772</td>\n", - " <td>-0.655514</td>\n", - " <td>0.104933</td>\n", - " <td>-5.209572</td>\n", - " <td>0.498366</td>\n", - " <td>-0.194772</td>\n", - " <td>0.637863</td>\n", - " <td>-2.357572</td>\n", - " <td>-2.196884</td>\n", - " <td>...</td>\n", - " <td>-4.334103</td>\n", - " <td>-4.561624</td>\n", - " <td>-4.719714</td>\n", - " <td>6.774479</td>\n", - " <td>6.862654</td>\n", - " <td>3.529789</td>\n", - " <td>-2.656642</td>\n", - " <td>4.327117</td>\n", - " <td>2.022346</td>\n", - " <td>0.79426</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>GSM1069776</td>\n", - " <td>0.400779</td>\n", - " <td>-0.597444</td>\n", - " <td>0.232702</td>\n", - " <td>-4.952808</td>\n", - " <td>1.081166</td>\n", - " <td>0.249982</td>\n", - " <td>1.45018</td>\n", - " <td>-1.138559</td>\n", - " <td>-1.802774</td>\n", - " <td>...</td>\n", - " <td>-4.550077</td>\n", - " <td>-4.40729</td>\n", - " <td>-4.621278</td>\n", - " <td>6.808404</td>\n", - " <td>6.75867</td>\n", - " <td>3.496675</td>\n", - " <td>-2.676555</td>\n", - " <td>4.616284</td>\n", - " <td>1.498011</td>\n", - " <td>1.584544</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>GSM1069777</td>\n", - " <td>0.380263</td>\n", - " <td>-0.900491</td>\n", - " <td>0.243207</td>\n", - " <td>-4.892073</td>\n", - " <td>-0.023958</td>\n", - " <td>-0.980435</td>\n", - " <td>1.071857</td>\n", - " <td>-2.077406</td>\n", - " <td>-2.11406</td>\n", - " <td>...</td>\n", - " <td>-4.018911</td>\n", - " <td>-4.203106</td>\n", - " <td>-3.938707</td>\n", - " <td>6.524773</td>\n", - " <td>6.497959</td>\n", - " <td>3.541502</td>\n", - " <td>-3.073553</td>\n", - " <td>4.581648</td>\n", - " <td>0.789822</td>\n", - " <td>1.255367</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>GSM1069778</td>\n", - " <td>0.422207</td>\n", - " <td>-0.414831</td>\n", - " <td>-0.000781</td>\n", - " <td>-5.139127</td>\n", - " <td>1.077485</td>\n", - " <td>-0.684875</td>\n", - " <td>0.724751</td>\n", - " <td>-0.689096</td>\n", - " <td>-1.182558</td>\n", - " <td>...</td>\n", - " <td>-3.690971</td>\n", - " <td>-4.332452</td>\n", - " <td>-4.178727</td>\n", - " <td>6.562608</td>\n", - " <td>6.529399</td>\n", - " <td>3.305132</td>\n", - " <td>-2.964948</td>\n", - " <td>4.487481</td>\n", - " <td>1.219583</td>\n", - " <td>0.951615</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>233</th>\n", - " <td>GSM1070007</td>\n", - " <td>0.98797</td>\n", - " <td>-0.118186</td>\n", - " <td>0.750199</td>\n", - " <td>-4.572984</td>\n", - " <td>0.696251</td>\n", - " <td>-1.089669</td>\n", - " <td>0.826</td>\n", - " <td>-1.604393</td>\n", - " <td>-2.87334</td>\n", - " <td>...</td>\n", - " <td>-2.163581</td>\n", - " <td>-2.15805</td>\n", - " <td>-2.302647</td>\n", - " <td>7.093144</td>\n", - " <td>7.150126</td>\n", - " <td>3.899704</td>\n", - " <td>-2.954284</td>\n", - " <td>5.505105</td>\n", - " <td>2.457963</td>\n", - " <td>2.142301</td>\n", - " </tr>\n", - " <tr>\n", - " <th>234</th>\n", - " <td>GSM1070008</td>\n", - " <td>-0.194781</td>\n", - " <td>-0.710519</td>\n", - " <td>-0.700226</td>\n", - " <td>-5.651293</td>\n", - " <td>0.742722</td>\n", - " <td>-0.964527</td>\n", - " <td>0.570816</td>\n", - " <td>-1.046029</td>\n", - " <td>-1.840615</td>\n", - " <td>...</td>\n", - " <td>-4.507365</td>\n", - " <td>-4.23831</td>\n", - " <td>-4.63219</td>\n", - " <td>6.18658</td>\n", - " <td>6.232722</td>\n", - " <td>2.788619</td>\n", - " <td>-3.103706</td>\n", - " <td>4.340513</td>\n", - " <td>0.232713</td>\n", - " <td>1.067806</td>\n", - " </tr>\n", - " <tr>\n", - " <th>235</th>\n", - " <td>GSM1070009</td>\n", - " <td>0.21218</td>\n", - " <td>-0.284657</td>\n", - " <td>-0.32472</td>\n", - " <td>-4.800142</td>\n", - " <td>1.0062</td>\n", - " <td>-0.141699</td>\n", - " <td>0.80704</td>\n", - " <td>-0.993146</td>\n", - " <td>-0.823621</td>\n", - " <td>...</td>\n", - " <td>-2.737709</td>\n", - " <td>-2.644713</td>\n", - " <td>-3.253632</td>\n", - " <td>6.505956</td>\n", - " <td>6.548781</td>\n", - " <td>3.12575</td>\n", - " <td>-2.917537</td>\n", - " <td>4.838599</td>\n", - " <td>0.863574</td>\n", - " <td>1.203499</td>\n", - " </tr>\n", - " <tr>\n", - " <th>236</th>\n", - " <td>GSM1070010</td>\n", - " <td>0.330997</td>\n", - " <td>-0.19446</td>\n", - " <td>-0.206405</td>\n", - " <td>-4.840442</td>\n", - " <td>1.521159</td>\n", - " <td>-0.424901</td>\n", - " <td>0.886358</td>\n", - " <td>-0.031455</td>\n", - " <td>-1.584939</td>\n", - " <td>...</td>\n", - " <td>-3.292034</td>\n", - " <td>-2.941633</td>\n", - " <td>-3.939222</td>\n", - " <td>6.790132</td>\n", - " <td>6.829164</td>\n", - " <td>3.365475</td>\n", - " <td>-2.736411</td>\n", - " <td>5.185601</td>\n", - " <td>0.846454</td>\n", - " <td>1.604729</td>\n", - " </tr>\n", - " <tr>\n", - " <th>237</th>\n", - " <td>GSM1070011</td>\n", - " <td>0.474815</td>\n", - " <td>0.043697</td>\n", - " <td>-0.102511</td>\n", - " <td>-4.849285</td>\n", - " <td>1.239637</td>\n", - " <td>-0.704124</td>\n", - " <td>0.698355</td>\n", - " <td>-0.414715</td>\n", - " <td>-1.721427</td>\n", - " <td>...</td>\n", - " <td>-3.378909</td>\n", - " <td>-2.909732</td>\n", - " <td>-3.510667</td>\n", - " <td>6.80237</td>\n", - " <td>6.784016</td>\n", - " <td>3.514036</td>\n", - " <td>-2.931018</td>\n", - " <td>4.798139</td>\n", - " <td>2.08952</td>\n", - " <td>1.597958</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>238 rows × 231 columns</p>\n", - "</div>" - ], - "text/plain": [ - " index dmr_3 dmr_31a dmr_6 ebv-miR-BART13 hsa-let-7c \\\n", - "0 GSM1069774 0.732675 -0.242559 0.577801 -4.469532 1.195899 \n", - "1 GSM1069775 0.249772 -0.655514 0.104933 -5.209572 0.498366 \n", - "2 GSM1069776 0.400779 -0.597444 0.232702 -4.952808 1.081166 \n", - "3 GSM1069777 0.380263 -0.900491 0.243207 -4.892073 -0.023958 \n", - "4 GSM1069778 0.422207 -0.414831 -0.000781 -5.139127 1.077485 \n", - ".. ... ... ... ... ... ... \n", - "233 GSM1070007 0.98797 -0.118186 0.750199 -4.572984 0.696251 \n", - "234 GSM1070008 -0.194781 -0.710519 -0.700226 -5.651293 0.742722 \n", - "235 GSM1070009 0.21218 -0.284657 -0.32472 -4.800142 1.0062 \n", - "236 GSM1070010 0.330997 -0.19446 -0.206405 -4.840442 1.521159 \n", - "237 GSM1070011 0.474815 0.043697 -0.102511 -4.849285 1.239637 \n", - "\n", - " hsa-let-7d-5p hsa-let-7i-5p hsa-miR-100-5p hsa-miR-101-3p ... \\\n", - "0 -0.334742 0.89199 -2.089223 -2.757097 ... \n", - "1 -0.194772 0.637863 -2.357572 -2.196884 ... \n", - "2 0.249982 1.45018 -1.138559 -1.802774 ... \n", - "3 -0.980435 1.071857 -2.077406 -2.11406 ... \n", - "4 -0.684875 0.724751 -0.689096 -1.182558 ... \n", - ".. ... ... ... ... ... \n", - "233 -1.089669 0.826 -1.604393 -2.87334 ... \n", - "234 -0.964527 0.570816 -1.046029 -1.840615 ... \n", - "235 -0.141699 0.80704 -0.993146 -0.823621 ... \n", - "236 -0.424901 0.886358 -0.031455 -1.584939 ... \n", - "237 -0.704124 0.698355 -0.414715 -1.721427 ... \n", - "\n", - " hsv2-miR-H24 hsv2-miR-H25 hsv2-miR-H6 hur_1 hur_2 hur_4 \\\n", - "0 -3.956004 -3.936689 -4.099346 6.98856 7.041557 3.822267 \n", - "1 -4.334103 -4.561624 -4.719714 6.774479 6.862654 3.529789 \n", - "2 -4.550077 -4.40729 -4.621278 6.808404 6.75867 3.496675 \n", - "3 -4.018911 -4.203106 -3.938707 6.524773 6.497959 3.541502 \n", - "4 -3.690971 -4.332452 -4.178727 6.562608 6.529399 3.305132 \n", - ".. ... ... ... ... ... ... \n", - "233 -2.163581 -2.15805 -2.302647 7.093144 7.150126 3.899704 \n", - "234 -4.507365 -4.23831 -4.63219 6.18658 6.232722 2.788619 \n", - "235 -2.737709 -2.644713 -3.253632 6.505956 6.548781 3.12575 \n", - "236 -3.292034 -2.941633 -3.939222 6.790132 6.829164 3.365475 \n", - "237 -3.378909 -2.909732 -3.510667 6.80237 6.784016 3.514036 \n", - "\n", - " hur_5 hur_6 miRNABrightCorner30 mr_1 \n", - "0 -2.268209 5.114399 2.017444 1.640437 \n", - "1 -2.656642 4.327117 2.022346 0.79426 \n", - "2 -2.676555 4.616284 1.498011 1.584544 \n", - "3 -3.073553 4.581648 0.789822 1.255367 \n", - "4 -2.964948 4.487481 1.219583 0.951615 \n", - ".. ... ... ... ... \n", - "233 -2.954284 5.505105 2.457963 2.142301 \n", - "234 -3.103706 4.340513 0.232713 1.067806 \n", - "235 -2.917537 4.838599 0.863574 1.203499 \n", - "236 -2.736411 5.185601 0.846454 1.604729 \n", - "237 -2.931018 4.798139 2.08952 1.597958 \n", - "\n", - "[238 rows x 231 columns]" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, { "cell_type": "code", "execution_count": 44,