From 0bf846a08de1dae8caee3749129a74c3e69a6d7e Mon Sep 17 00:00:00 2001 From: aakan96 <aakan96@mi.fu-berlin.de> Date: Fri, 14 Jul 2023 20:47:57 +0000 Subject: [PATCH] Update file DS_mRNA_limma_dataset_xgb_final-F.ipynb --- .../DS_mRNA_limma_dataset_xgb_final-F.ipynb | 1142 ----------------- 1 file changed, 1142 deletions(-) diff --git a/Machine Learning/DS_mRNA_limma_dataset_xgb_final-F.ipynb b/Machine Learning/DS_mRNA_limma_dataset_xgb_final-F.ipynb index c94fc84..a8bebcc 100644 --- a/Machine Learning/DS_mRNA_limma_dataset_xgb_final-F.ipynb +++ b/Machine Learning/DS_mRNA_limma_dataset_xgb_final-F.ipynb @@ -78,391 +78,6 @@ "#df_test = df_test[:-1]" ] }, - { - "cell_type": "code", - "execution_count": 6, - "id": "c7c9cdfc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>0</th>\n", - " <th>1</th>\n", - " <th>2</th>\n", - " <th>3</th>\n", - " <th>4</th>\n", - " <th>5</th>\n", - " <th>6</th>\n", - " <th>7</th>\n", - " <th>8</th>\n", - " <th>9</th>\n", - " <th>...</th>\n", - " <th>568</th>\n", - " <th>569</th>\n", - " <th>570</th>\n", - " <th>571</th>\n", - " <th>572</th>\n", - " <th>573</th>\n", - " <th>574</th>\n", - " <th>575</th>\n", - " <th>576</th>\n", - " <th>577</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>Gene_symbol</th>\n", - " <td>ABAT</td>\n", - " <td>ABHD5</td>\n", - " <td>ABLIM1</td>\n", - " <td>ABLIM3</td>\n", - " <td>ACAA1</td>\n", - " <td>ACADM</td>\n", - " <td>ACADVL</td>\n", - " <td>ACD</td>\n", - " <td>ACLY</td>\n", - " <td>ACOT11</td>\n", - " <td>...</td>\n", - " <td>XYLT1</td>\n", - " <td>YOD1</td>\n", - " <td>YTHDC1</td>\n", - " <td>ZBTB16</td>\n", - " <td>ZDHHC13</td>\n", - " <td>ZFP64</td>\n", - " <td>ZNF185</td>\n", - " <td>ZNF365</td>\n", - " <td>ZNF426</td>\n", - " <td>ZNF710</td>\n", - " </tr>\n", - " <tr>\n", - " <th>GSM1727130</th>\n", - " <td>186</td>\n", - " <td>2603</td>\n", - " <td>42653</td>\n", - " <td>220</td>\n", - " <td>2132</td>\n", - " <td>22869</td>\n", - " <td>19775</td>\n", - " <td>4486</td>\n", - " <td>8835</td>\n", - " <td>2332</td>\n", - " <td>...</td>\n", - " <td>392</td>\n", - " <td>222</td>\n", - " <td>295</td>\n", - " <td>4598</td>\n", - " <td>7009</td>\n", - " <td>568</td>\n", - " <td>65123</td>\n", - " <td>56</td>\n", - " <td>308</td>\n", - " <td>10385</td>\n", - " </tr>\n", - " <tr>\n", - " <th>GSM1727131</th>\n", - " <td>93</td>\n", - " <td>1137</td>\n", - " <td>16493</td>\n", - " <td>69</td>\n", - " <td>1816</td>\n", - " <td>17788</td>\n", - " <td>16870</td>\n", - " <td>7993</td>\n", - " <td>21434</td>\n", - " <td>2211</td>\n", - " <td>...</td>\n", - " <td>62</td>\n", - " <td>78</td>\n", - " <td>144</td>\n", - " <td>2132</td>\n", - " <td>2602</td>\n", - " <td>1720</td>\n", - " <td>13531</td>\n", - " <td>47</td>\n", - " <td>140</td>\n", - " <td>6441</td>\n", - " </tr>\n", - " <tr>\n", - " <th>GSM1727132</th>\n", - " <td>198</td>\n", - " <td>5593</td>\n", - " <td>53918</td>\n", - " <td>263</td>\n", - " <td>3490</td>\n", - " <td>39276</td>\n", - " <td>25847</td>\n", - " <td>4413</td>\n", - " <td>9212</td>\n", - " <td>7419</td>\n", - " <td>...</td>\n", - " <td>481</td>\n", - " <td>355</td>\n", - " <td>308</td>\n", - " <td>1071</td>\n", - " <td>10289</td>\n", - " <td>379</td>\n", - " <td>65131</td>\n", - " <td>206</td>\n", - " <td>1251</td>\n", - " <td>11768</td>\n", - " </tr>\n", - " <tr>\n", - " <th>GSM1727133</th>\n", - " <td>104</td>\n", - " <td>1636</td>\n", - " <td>19203</td>\n", - " <td>127</td>\n", - " <td>1518</td>\n", - " <td>17951</td>\n", - " <td>16854</td>\n", - " <td>12800</td>\n", - " <td>11939</td>\n", - " <td>5136</td>\n", - " <td>...</td>\n", - " <td>213</td>\n", - " <td>122</td>\n", - " <td>244</td>\n", - " <td>482</td>\n", - " <td>3578</td>\n", - " <td>1990</td>\n", - " <td>37715</td>\n", - " <td>66</td>\n", - " <td>361</td>\n", - " <td>8517</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>GSM573947</th>\n", - " <td>4.273622</td>\n", - " <td>5.246957</td>\n", - " <td>9.597787</td>\n", - " <td>6.158036</td>\n", - " <td>7.843278</td>\n", - " <td>7.540486</td>\n", - " <td>10.125865</td>\n", - " <td>8.390029</td>\n", - " <td>7.260406</td>\n", - " <td>7.029879</td>\n", - " <td>...</td>\n", - " <td>7.411724</td>\n", - " <td>4.940705</td>\n", - " <td>9.863287</td>\n", - " <td>6.235713</td>\n", - " <td>7.042848</td>\n", - " <td>7.675928</td>\n", - " <td>7.964469</td>\n", - " <td>6.295932</td>\n", - " <td>5.095579</td>\n", - " <td>8.884501</td>\n", - " </tr>\n", - " <tr>\n", - " <th>GSM573948</th>\n", - " <td>5.587715</td>\n", - " <td>5.0088</td>\n", - " <td>8.983841</td>\n", - " <td>8.052282</td>\n", - " <td>8.080951</td>\n", - " <td>8.557505</td>\n", - " <td>9.603772</td>\n", - " <td>8.493015</td>\n", - " <td>6.80822</td>\n", - " <td>7.442001</td>\n", - " <td>...</td>\n", - " <td>7.809331</td>\n", - " <td>4.748696</td>\n", - " <td>9.966924</td>\n", - " <td>6.370717</td>\n", - " <td>6.694095</td>\n", - " <td>7.215412</td>\n", - " <td>9.596515</td>\n", - " <td>6.052377</td>\n", - " <td>5.453787</td>\n", - " <td>8.364667</td>\n", - " </tr>\n", - " <tr>\n", - " <th>GSM573949</th>\n", - " <td>4.448848</td>\n", - " <td>5.210555</td>\n", - " <td>9.464238</td>\n", - " <td>6.475869</td>\n", - " <td>7.987815</td>\n", - " <td>8.141677</td>\n", - " <td>9.614827</td>\n", - " <td>8.336654</td>\n", - " <td>7.629702</td>\n", - " <td>7.163679</td>\n", - " <td>...</td>\n", - " <td>7.389107</td>\n", - " <td>5.044658</td>\n", - " <td>9.909041</td>\n", - " <td>6.399272</td>\n", - " <td>6.468483</td>\n", - " <td>7.134219</td>\n", - " <td>8.123325</td>\n", - " <td>6.01643</td>\n", - " <td>5.06643</td>\n", - " <td>8.760867</td>\n", - " </tr>\n", - " <tr>\n", - " <th>GSM573950</th>\n", - " <td>5.032263</td>\n", - " <td>4.849188</td>\n", - " <td>9.468484</td>\n", - " <td>7.966742</td>\n", - " <td>8.143446</td>\n", - " <td>8.049146</td>\n", - " <td>9.544212</td>\n", - " <td>8.524367</td>\n", - " <td>6.777896</td>\n", - " <td>7.389599</td>\n", - " <td>...</td>\n", - " <td>7.237896</td>\n", - " <td>4.971315</td>\n", - " <td>10.181616</td>\n", - " <td>6.812211</td>\n", - " <td>8.065029</td>\n", - " <td>7.25272</td>\n", - " <td>8.542159</td>\n", - " <td>6.113286</td>\n", - " <td>5.045287</td>\n", - " <td>8.657596</td>\n", - " </tr>\n", - " <tr>\n", - " <th>GSM573951</th>\n", - " <td>4.461845</td>\n", - " <td>4.846861</td>\n", - " <td>7.099264</td>\n", - " <td>6.269944</td>\n", - " <td>8.190815</td>\n", - " <td>8.865402</td>\n", - " <td>10.212028</td>\n", - " <td>8.61563</td>\n", - " <td>7.266994</td>\n", - " <td>7.081327</td>\n", - " <td>...</td>\n", - " <td>6.941117</td>\n", - " <td>5.060393</td>\n", - " <td>10.228238</td>\n", - " <td>6.01229</td>\n", - " <td>6.759003</td>\n", - " <td>7.871117</td>\n", - " <td>6.893819</td>\n", - " <td>5.94915</td>\n", - " <td>5.135424</td>\n", - " <td>8.292854</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>220 rows × 578 columns</p>\n", - "</div>" - ], - "text/plain": [ - " 0 1 2 3 4 5 \\\n", - "Gene_symbol ABAT ABHD5 ABLIM1 ABLIM3 ACAA1 ACADM \n", - "GSM1727130 186 2603 42653 220 2132 22869 \n", - "GSM1727131 93 1137 16493 69 1816 17788 \n", - "GSM1727132 198 5593 53918 263 3490 39276 \n", - "GSM1727133 104 1636 19203 127 1518 17951 \n", - "... ... ... ... ... ... ... \n", - "GSM573947 4.273622 5.246957 9.597787 6.158036 7.843278 7.540486 \n", - "GSM573948 5.587715 5.0088 8.983841 8.052282 8.080951 8.557505 \n", - "GSM573949 4.448848 5.210555 9.464238 6.475869 7.987815 8.141677 \n", - "GSM573950 5.032263 4.849188 9.468484 7.966742 8.143446 8.049146 \n", - "GSM573951 4.461845 4.846861 7.099264 6.269944 8.190815 8.865402 \n", - "\n", - " 6 7 8 9 ... 568 569 \\\n", - "Gene_symbol ACADVL ACD ACLY ACOT11 ... XYLT1 YOD1 \n", - "GSM1727130 19775 4486 8835 2332 ... 392 222 \n", - "GSM1727131 16870 7993 21434 2211 ... 62 78 \n", - "GSM1727132 25847 4413 9212 7419 ... 481 355 \n", - "GSM1727133 16854 12800 11939 5136 ... 213 122 \n", - "... ... ... ... ... ... ... ... \n", - "GSM573947 10.125865 8.390029 7.260406 7.029879 ... 7.411724 4.940705 \n", - "GSM573948 9.603772 8.493015 6.80822 7.442001 ... 7.809331 4.748696 \n", - "GSM573949 9.614827 8.336654 7.629702 7.163679 ... 7.389107 5.044658 \n", - "GSM573950 9.544212 8.524367 6.777896 7.389599 ... 7.237896 4.971315 \n", - "GSM573951 10.212028 8.61563 7.266994 7.081327 ... 6.941117 5.060393 \n", - "\n", - " 570 571 572 573 574 575 \\\n", - "Gene_symbol YTHDC1 ZBTB16 ZDHHC13 ZFP64 ZNF185 ZNF365 \n", - "GSM1727130 295 4598 7009 568 65123 56 \n", - "GSM1727131 144 2132 2602 1720 13531 47 \n", - "GSM1727132 308 1071 10289 379 65131 206 \n", - "GSM1727133 244 482 3578 1990 37715 66 \n", - "... ... ... ... ... ... ... \n", - "GSM573947 9.863287 6.235713 7.042848 7.675928 7.964469 6.295932 \n", - "GSM573948 9.966924 6.370717 6.694095 7.215412 9.596515 6.052377 \n", - "GSM573949 9.909041 6.399272 6.468483 7.134219 8.123325 6.01643 \n", - "GSM573950 10.181616 6.812211 8.065029 7.25272 8.542159 6.113286 \n", - "GSM573951 10.228238 6.01229 6.759003 7.871117 6.893819 5.94915 \n", - "\n", - " 576 577 \n", - "Gene_symbol ZNF426 ZNF710 \n", - "GSM1727130 308 10385 \n", - "GSM1727131 140 6441 \n", - "GSM1727132 1251 11768 \n", - "GSM1727133 361 8517 \n", - "... ... ... \n", - "GSM573947 5.095579 8.884501 \n", - "GSM573948 5.453787 8.364667 \n", - "GSM573949 5.06643 8.760867 \n", - "GSM573950 5.045287 8.657596 \n", - "GSM573951 5.135424 8.292854 \n", - "\n", - "[220 rows x 578 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_train" - ] - }, { "cell_type": "code", "execution_count": 7, @@ -722,391 +337,6 @@ "df_train= df_train.apply(pd.to_numeric)" ] }, - { - "cell_type": "code", - "execution_count": 28, - "id": "2e4ab71d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>ABAT</th>\n", - " <th>ABHD5</th>\n", - " <th>ABLIM1</th>\n", - " <th>ABLIM3</th>\n", - " <th>ACAA1</th>\n", - " <th>ACADM</th>\n", - " <th>ACADVL</th>\n", - " <th>ACD</th>\n", - " <th>ACLY</th>\n", - " <th>ACOT11</th>\n", - " <th>...</th>\n", - " <th>YOD1</th>\n", - " <th>YTHDC1</th>\n", - " <th>ZBTB16</th>\n", - " <th>ZDHHC13</th>\n", - " <th>ZFP64</th>\n", - " <th>ZNF185</th>\n", - " <th>ZNF365</th>\n", - " <th>ZNF426</th>\n", - " <th>ZNF710</th>\n", - " <th>index</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>186.000000</td>\n", - " <td>2603.000000</td>\n", - " <td>42653.000000</td>\n", - " <td>220.000000</td>\n", - " <td>2132.000000</td>\n", - " <td>22869.000000</td>\n", - " <td>19775.000000</td>\n", - " <td>4486.000000</td>\n", - " <td>8835.000000</td>\n", - " <td>2332.000000</td>\n", - " <td>...</td>\n", - " <td>222.000000</td>\n", - " <td>295.000000</td>\n", - " <td>4598.000000</td>\n", - " <td>7009.000000</td>\n", - " <td>568.000000</td>\n", - " <td>65123.000000</td>\n", - " <td>56.000000</td>\n", - " <td>308.000000</td>\n", - " <td>10385.000000</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>93.000000</td>\n", - " <td>1137.000000</td>\n", - " <td>16493.000000</td>\n", - " <td>69.000000</td>\n", - " <td>1816.000000</td>\n", - " <td>17788.000000</td>\n", - " <td>16870.000000</td>\n", - " <td>7993.000000</td>\n", - " <td>21434.000000</td>\n", - " <td>2211.000000</td>\n", - " <td>...</td>\n", - " <td>78.000000</td>\n", - " <td>144.000000</td>\n", - " <td>2132.000000</td>\n", - " <td>2602.000000</td>\n", - " <td>1720.000000</td>\n", - " <td>13531.000000</td>\n", - " <td>47.000000</td>\n", - " <td>140.000000</td>\n", - " <td>6441.000000</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>198.000000</td>\n", - " <td>5593.000000</td>\n", - " <td>53918.000000</td>\n", - " <td>263.000000</td>\n", - " <td>3490.000000</td>\n", - " <td>39276.000000</td>\n", - " <td>25847.000000</td>\n", - " <td>4413.000000</td>\n", - " <td>9212.000000</td>\n", - " <td>7419.000000</td>\n", - " <td>...</td>\n", - " <td>355.000000</td>\n", - " <td>308.000000</td>\n", - " <td>1071.000000</td>\n", - " <td>10289.000000</td>\n", - " <td>379.000000</td>\n", - " <td>65131.000000</td>\n", - " <td>206.000000</td>\n", - " <td>1251.000000</td>\n", - " <td>11768.000000</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>104.000000</td>\n", - " <td>1636.000000</td>\n", - " <td>19203.000000</td>\n", - " <td>127.000000</td>\n", - " <td>1518.000000</td>\n", - " <td>17951.000000</td>\n", - " <td>16854.000000</td>\n", - " <td>12800.000000</td>\n", - " <td>11939.000000</td>\n", - " <td>5136.000000</td>\n", - " <td>...</td>\n", - " <td>122.000000</td>\n", - " <td>244.000000</td>\n", - " <td>482.000000</td>\n", - " <td>3578.000000</td>\n", - " <td>1990.000000</td>\n", - " <td>37715.000000</td>\n", - " <td>66.000000</td>\n", - " <td>361.000000</td>\n", - " <td>8517.000000</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>205.000000</td>\n", - " <td>4720.000000</td>\n", - " <td>56984.000000</td>\n", - " <td>495.000000</td>\n", - " <td>3309.000000</td>\n", - " <td>24427.000000</td>\n", - " <td>28197.000000</td>\n", - " <td>5718.000000</td>\n", - " <td>8192.000000</td>\n", - " <td>6748.000000</td>\n", - " <td>...</td>\n", - " <td>275.000000</td>\n", - " <td>200.000000</td>\n", - " <td>3632.000000</td>\n", - " <td>7275.000000</td>\n", - " <td>509.000000</td>\n", - " <td>65138.000000</td>\n", - " <td>188.000000</td>\n", - " <td>587.000000</td>\n", - " <td>9390.000000</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>214</th>\n", - " <td>4.273622</td>\n", - " <td>5.246957</td>\n", - " <td>9.597787</td>\n", - " <td>6.158036</td>\n", - " <td>7.843278</td>\n", - " <td>7.540486</td>\n", - " <td>10.125865</td>\n", - " <td>8.390029</td>\n", - " <td>7.260406</td>\n", - " <td>7.029879</td>\n", - " <td>...</td>\n", - " <td>4.940705</td>\n", - " <td>9.863287</td>\n", - " <td>6.235713</td>\n", - " <td>7.042848</td>\n", - " <td>7.675928</td>\n", - " <td>7.964469</td>\n", - " <td>6.295932</td>\n", - " <td>5.095579</td>\n", - " <td>8.884501</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>215</th>\n", - " <td>5.587715</td>\n", - " <td>5.008800</td>\n", - " <td>8.983841</td>\n", - " <td>8.052282</td>\n", - " <td>8.080951</td>\n", - " <td>8.557505</td>\n", - " <td>9.603772</td>\n", - " <td>8.493015</td>\n", - " <td>6.808220</td>\n", - " <td>7.442001</td>\n", - " <td>...</td>\n", - " <td>4.748696</td>\n", - " <td>9.966924</td>\n", - " <td>6.370717</td>\n", - " <td>6.694095</td>\n", - " <td>7.215412</td>\n", - " <td>9.596515</td>\n", - " <td>6.052377</td>\n", - " <td>5.453787</td>\n", - " <td>8.364667</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>216</th>\n", - " <td>4.448848</td>\n", - " <td>5.210555</td>\n", - " <td>9.464238</td>\n", - " <td>6.475869</td>\n", - " <td>7.987815</td>\n", - " <td>8.141677</td>\n", - " <td>9.614827</td>\n", - " <td>8.336654</td>\n", - " <td>7.629702</td>\n", - " <td>7.163679</td>\n", - " <td>...</td>\n", - " <td>5.044658</td>\n", - " <td>9.909041</td>\n", - " <td>6.399272</td>\n", - " <td>6.468483</td>\n", - " <td>7.134219</td>\n", - " <td>8.123325</td>\n", - " <td>6.016430</td>\n", - " <td>5.066430</td>\n", - " <td>8.760867</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>217</th>\n", - " <td>5.032263</td>\n", - " <td>4.849188</td>\n", - " <td>9.468484</td>\n", - " <td>7.966742</td>\n", - " <td>8.143446</td>\n", - " <td>8.049146</td>\n", - " <td>9.544212</td>\n", - " <td>8.524367</td>\n", - " <td>6.777896</td>\n", - " <td>7.389599</td>\n", - " <td>...</td>\n", - " <td>4.971315</td>\n", - " <td>10.181616</td>\n", - " <td>6.812211</td>\n", - " <td>8.065029</td>\n", - " <td>7.252720</td>\n", - " <td>8.542159</td>\n", - " <td>6.113286</td>\n", - " <td>5.045287</td>\n", - " <td>8.657596</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>218</th>\n", - " <td>4.461845</td>\n", - " <td>4.846861</td>\n", - " <td>7.099264</td>\n", - " <td>6.269944</td>\n", - " <td>8.190815</td>\n", - " <td>8.865402</td>\n", - " <td>10.212028</td>\n", - " <td>8.615630</td>\n", - " <td>7.266994</td>\n", - " <td>7.081327</td>\n", - " <td>...</td>\n", - " <td>5.060393</td>\n", - " <td>10.228238</td>\n", - " <td>6.012290</td>\n", - " <td>6.759003</td>\n", - " <td>7.871117</td>\n", - " <td>6.893819</td>\n", - " <td>5.949150</td>\n", - " <td>5.135424</td>\n", - " <td>8.292854</td>\n", - " <td>1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>219 rows × 579 columns</p>\n", - "</div>" - ], - "text/plain": [ - " ABAT ABHD5 ABLIM1 ABLIM3 ACAA1 \\\n", - "0 186.000000 2603.000000 42653.000000 220.000000 2132.000000 \n", - "1 93.000000 1137.000000 16493.000000 69.000000 1816.000000 \n", - "2 198.000000 5593.000000 53918.000000 263.000000 3490.000000 \n", - "3 104.000000 1636.000000 19203.000000 127.000000 1518.000000 \n", - "4 205.000000 4720.000000 56984.000000 495.000000 3309.000000 \n", - ".. ... ... ... ... ... \n", - "214 4.273622 5.246957 9.597787 6.158036 7.843278 \n", - "215 5.587715 5.008800 8.983841 8.052282 8.080951 \n", - "216 4.448848 5.210555 9.464238 6.475869 7.987815 \n", - "217 5.032263 4.849188 9.468484 7.966742 8.143446 \n", - "218 4.461845 4.846861 7.099264 6.269944 8.190815 \n", - "\n", - " ACADM ACADVL ACD ACLY ACOT11 ... \\\n", - "0 22869.000000 19775.000000 4486.000000 8835.000000 2332.000000 ... \n", - "1 17788.000000 16870.000000 7993.000000 21434.000000 2211.000000 ... \n", - "2 39276.000000 25847.000000 4413.000000 9212.000000 7419.000000 ... \n", - "3 17951.000000 16854.000000 12800.000000 11939.000000 5136.000000 ... \n", - "4 24427.000000 28197.000000 5718.000000 8192.000000 6748.000000 ... \n", - ".. ... ... ... ... ... ... \n", - "214 7.540486 10.125865 8.390029 7.260406 7.029879 ... \n", - "215 8.557505 9.603772 8.493015 6.808220 7.442001 ... \n", - "216 8.141677 9.614827 8.336654 7.629702 7.163679 ... \n", - "217 8.049146 9.544212 8.524367 6.777896 7.389599 ... \n", - "218 8.865402 10.212028 8.615630 7.266994 7.081327 ... \n", - "\n", - " YOD1 YTHDC1 ZBTB16 ZDHHC13 ZFP64 \\\n", - "0 222.000000 295.000000 4598.000000 7009.000000 568.000000 \n", - "1 78.000000 144.000000 2132.000000 2602.000000 1720.000000 \n", - "2 355.000000 308.000000 1071.000000 10289.000000 379.000000 \n", - "3 122.000000 244.000000 482.000000 3578.000000 1990.000000 \n", - "4 275.000000 200.000000 3632.000000 7275.000000 509.000000 \n", - ".. ... ... ... ... ... \n", - "214 4.940705 9.863287 6.235713 7.042848 7.675928 \n", - "215 4.748696 9.966924 6.370717 6.694095 7.215412 \n", - "216 5.044658 9.909041 6.399272 6.468483 7.134219 \n", - "217 4.971315 10.181616 6.812211 8.065029 7.252720 \n", - "218 5.060393 10.228238 6.012290 6.759003 7.871117 \n", - "\n", - " ZNF185 ZNF365 ZNF426 ZNF710 index \n", - "0 65123.000000 56.000000 308.000000 10385.000000 0 \n", - "1 13531.000000 47.000000 140.000000 6441.000000 1 \n", - "2 65131.000000 206.000000 1251.000000 11768.000000 0 \n", - "3 37715.000000 66.000000 361.000000 8517.000000 1 \n", - "4 65138.000000 188.000000 587.000000 9390.000000 0 \n", - ".. ... ... ... ... ... \n", - "214 7.964469 6.295932 5.095579 8.884501 1 \n", - "215 9.596515 6.052377 5.453787 8.364667 1 \n", - "216 8.123325 6.016430 5.066430 8.760867 1 \n", - "217 8.542159 6.113286 5.045287 8.657596 1 \n", - "218 6.893819 5.949150 5.135424 8.292854 1 \n", - "\n", - "[219 rows x 579 columns]" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_train" - ] - }, { "cell_type": "code", "execution_count": 29, @@ -1211,378 +441,6 @@ "# Feature Selection" ] }, - { - "cell_type": "code", - "execution_count": 34, - "id": "f0f1977f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>ABAT</th>\n", - " <th>ABHD5</th>\n", - " <th>ABLIM1</th>\n", - " <th>ABLIM3</th>\n", - " <th>ACAA1</th>\n", - " <th>ACADM</th>\n", - " <th>ACADVL</th>\n", - " <th>ACD</th>\n", - " <th>ACLY</th>\n", - " <th>ACOT11</th>\n", - " <th>...</th>\n", - " <th>XYLT1</th>\n", - " <th>YOD1</th>\n", - " <th>YTHDC1</th>\n", - " <th>ZBTB16</th>\n", - " <th>ZDHHC13</th>\n", - " <th>ZFP64</th>\n", - " <th>ZNF185</th>\n", - " <th>ZNF365</th>\n", - " <th>ZNF426</th>\n", - " <th>ZNF710</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>186</td>\n", - " <td>2603</td>\n", - " <td>42653</td>\n", - " <td>220</td>\n", - " <td>2132</td>\n", - " <td>22869</td>\n", - " <td>19775</td>\n", - " <td>4486</td>\n", - " <td>8835</td>\n", - " <td>2332</td>\n", - " <td>...</td>\n", - " <td>392</td>\n", - " <td>222</td>\n", - " <td>295</td>\n", - " <td>4598</td>\n", - " <td>7009</td>\n", - " <td>568</td>\n", - " <td>65123</td>\n", - " <td>56</td>\n", - " <td>308</td>\n", - " <td>10385</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>93</td>\n", - " <td>1137</td>\n", - " <td>16493</td>\n", - " <td>69</td>\n", - " <td>1816</td>\n", - " <td>17788</td>\n", - " <td>16870</td>\n", - " <td>7993</td>\n", - " <td>21434</td>\n", - " <td>2211</td>\n", - " <td>...</td>\n", - " <td>62</td>\n", - " <td>78</td>\n", - " <td>144</td>\n", - " <td>2132</td>\n", - " <td>2602</td>\n", - " <td>1720</td>\n", - " <td>13531</td>\n", - " <td>47</td>\n", - " <td>140</td>\n", - " <td>6441</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>198</td>\n", - " <td>5593</td>\n", - " <td>53918</td>\n", - " <td>263</td>\n", - " <td>3490</td>\n", - " <td>39276</td>\n", - " <td>25847</td>\n", - " <td>4413</td>\n", - " <td>9212</td>\n", - " <td>7419</td>\n", - " <td>...</td>\n", - " <td>481</td>\n", - " <td>355</td>\n", - " <td>308</td>\n", - " <td>1071</td>\n", - " <td>10289</td>\n", - " <td>379</td>\n", - " <td>65131</td>\n", - " <td>206</td>\n", - " <td>1251</td>\n", - " <td>11768</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>104</td>\n", - " <td>1636</td>\n", - " <td>19203</td>\n", - " <td>127</td>\n", - " <td>1518</td>\n", - " <td>17951</td>\n", - " <td>16854</td>\n", - " <td>12800</td>\n", - " <td>11939</td>\n", - " <td>5136</td>\n", - " <td>...</td>\n", - " <td>213</td>\n", - " <td>122</td>\n", - " <td>244</td>\n", - " <td>482</td>\n", - " <td>3578</td>\n", - " <td>1990</td>\n", - " <td>37715</td>\n", - " <td>66</td>\n", - " <td>361</td>\n", - " <td>8517</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>205</td>\n", - " <td>4720</td>\n", - " <td>56984</td>\n", - " <td>495</td>\n", - " <td>3309</td>\n", - " <td>24427</td>\n", - " <td>28197</td>\n", - " <td>5718</td>\n", - " <td>8192</td>\n", - " <td>6748</td>\n", - " <td>...</td>\n", - " <td>169</td>\n", - " <td>275</td>\n", - " <td>200</td>\n", - " <td>3632</td>\n", - " <td>7275</td>\n", - " <td>509</td>\n", - " <td>65138</td>\n", - " <td>188</td>\n", - " <td>587</td>\n", - " <td>9390</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>214</th>\n", - " <td>4</td>\n", - " <td>5</td>\n", - " <td>9</td>\n", - " <td>6</td>\n", - " <td>7</td>\n", - " <td>7</td>\n", - " <td>10</td>\n", - " <td>8</td>\n", - " <td>7</td>\n", - " <td>7</td>\n", - " <td>...</td>\n", - " <td>7</td>\n", - " <td>4</td>\n", - " <td>9</td>\n", - " <td>6</td>\n", - " <td>7</td>\n", - " <td>7</td>\n", - " <td>7</td>\n", - " <td>6</td>\n", - " <td>5</td>\n", - " <td>8</td>\n", - " </tr>\n", - " <tr>\n", - " <th>215</th>\n", - " <td>5</td>\n", - " <td>5</td>\n", - " <td>8</td>\n", - " <td>8</td>\n", - " <td>8</td>\n", - " <td>8</td>\n", - " <td>9</td>\n", - " <td>8</td>\n", - " <td>6</td>\n", - " <td>7</td>\n", - " <td>...</td>\n", - " <td>7</td>\n", - " <td>4</td>\n", - " <td>9</td>\n", - " <td>6</td>\n", - " <td>6</td>\n", - " <td>7</td>\n", - " <td>9</td>\n", - " <td>6</td>\n", - " <td>5</td>\n", - " <td>8</td>\n", - " </tr>\n", - " <tr>\n", - " <th>216</th>\n", - " <td>4</td>\n", - " <td>5</td>\n", - " <td>9</td>\n", - " <td>6</td>\n", - " <td>7</td>\n", - " <td>8</td>\n", - " <td>9</td>\n", - " <td>8</td>\n", - " <td>7</td>\n", - " <td>7</td>\n", - " <td>...</td>\n", - " <td>7</td>\n", - " <td>5</td>\n", - " <td>9</td>\n", - " <td>6</td>\n", - " <td>6</td>\n", - " <td>7</td>\n", - " <td>8</td>\n", - " <td>6</td>\n", - " <td>5</td>\n", - " <td>8</td>\n", - " </tr>\n", - " <tr>\n", - " <th>217</th>\n", - " <td>5</td>\n", - " <td>4</td>\n", - " <td>9</td>\n", - " <td>7</td>\n", - " <td>8</td>\n", - " <td>8</td>\n", - " <td>9</td>\n", - " <td>8</td>\n", - " <td>6</td>\n", - " <td>7</td>\n", - " <td>...</td>\n", - " <td>7</td>\n", - " <td>4</td>\n", - " <td>10</td>\n", - " <td>6</td>\n", - " <td>8</td>\n", - " <td>7</td>\n", - " <td>8</td>\n", - " <td>6</td>\n", - " <td>5</td>\n", - " <td>8</td>\n", - " </tr>\n", - " <tr>\n", - " <th>218</th>\n", - " <td>4</td>\n", - " <td>4</td>\n", - " <td>7</td>\n", - " <td>6</td>\n", - " <td>8</td>\n", - " <td>8</td>\n", - " <td>10</td>\n", - " <td>8</td>\n", - " <td>7</td>\n", - " <td>7</td>\n", - " <td>...</td>\n", - " <td>6</td>\n", - " <td>5</td>\n", - " <td>10</td>\n", - " <td>6</td>\n", - " <td>6</td>\n", - " <td>7</td>\n", - " <td>6</td>\n", - " <td>5</td>\n", - " <td>5</td>\n", - " <td>8</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>219 rows × 578 columns</p>\n", - "</div>" - ], - "text/plain": [ - " ABAT ABHD5 ABLIM1 ABLIM3 ACAA1 ACADM ACADVL ACD ACLY ACOT11 \\\n", - "0 186 2603 42653 220 2132 22869 19775 4486 8835 2332 \n", - "1 93 1137 16493 69 1816 17788 16870 7993 21434 2211 \n", - "2 198 5593 53918 263 3490 39276 25847 4413 9212 7419 \n", - "3 104 1636 19203 127 1518 17951 16854 12800 11939 5136 \n", - "4 205 4720 56984 495 3309 24427 28197 5718 8192 6748 \n", - ".. ... ... ... ... ... ... ... ... ... ... \n", - "214 4 5 9 6 7 7 10 8 7 7 \n", - "215 5 5 8 8 8 8 9 8 6 7 \n", - "216 4 5 9 6 7 8 9 8 7 7 \n", - "217 5 4 9 7 8 8 9 8 6 7 \n", - "218 4 4 7 6 8 8 10 8 7 7 \n", - "\n", - " ... XYLT1 YOD1 YTHDC1 ZBTB16 ZDHHC13 ZFP64 ZNF185 ZNF365 ZNF426 \\\n", - "0 ... 392 222 295 4598 7009 568 65123 56 308 \n", - "1 ... 62 78 144 2132 2602 1720 13531 47 140 \n", - "2 ... 481 355 308 1071 10289 379 65131 206 1251 \n", - "3 ... 213 122 244 482 3578 1990 37715 66 361 \n", - "4 ... 169 275 200 3632 7275 509 65138 188 587 \n", - ".. ... ... ... ... ... ... ... ... ... ... \n", - "214 ... 7 4 9 6 7 7 7 6 5 \n", - "215 ... 7 4 9 6 6 7 9 6 5 \n", - "216 ... 7 5 9 6 6 7 8 6 5 \n", - "217 ... 7 4 10 6 8 7 8 6 5 \n", - "218 ... 6 5 10 6 6 7 6 5 5 \n", - "\n", - " ZNF710 \n", - "0 10385 \n", - "1 6441 \n", - "2 11768 \n", - "3 8517 \n", - "4 9390 \n", - ".. ... \n", - "214 8 \n", - "215 8 \n", - "216 8 \n", - "217 8 \n", - "218 8 \n", - "\n", - "[219 rows x 578 columns]" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X" - ] - }, { "cell_type": "code", "execution_count": 35, -- GitLab