From 25d6b9be33a2bb450aec37fdf74242d08f0cecfd Mon Sep 17 00:00:00 2001
From: james94 <james94@mi.fu-berlin.de>
Date: Fri, 5 Aug 2022 14:45:16 +0200
Subject: [PATCH] rules/build_hifi.smk

update hifi build
---
 scripts/addFullTableForReport.py              |  23 +--
 scripts/colouredHeatmap_legend.tsv            |   5 +
 scripts/fullTable_heatmap_external.R          | 132 +++++++++++++++++
 .../fullTable_heatmap_internalComparison.R    | 140 ++++++++++++++++++
 scripts/internalComparison_legend.tsv         |   3 +
 scripts/tableOnSamePage.css                   |   3 +
 6 files changed, 296 insertions(+), 10 deletions(-)
 create mode 100644 scripts/colouredHeatmap_legend.tsv
 create mode 100644 scripts/fullTable_heatmap_external.R
 create mode 100644 scripts/fullTable_heatmap_internalComparison.R
 create mode 100644 scripts/internalComparison_legend.tsv
 create mode 100644 scripts/tableOnSamePage.css

diff --git a/scripts/addFullTableForReport.py b/scripts/addFullTableForReport.py
index bb37771..6ac763b 100644
--- a/scripts/addFullTableForReport.py
+++ b/scripts/addFullTableForReport.py
@@ -2,8 +2,8 @@ import pandas as pd
 from tabulate import tabulate
 samples=pd.read_csv(snakemake.input.results, dtype=str, index_col=0, delim_whitespace=True, skip_blank_lines=True)
 # samples=samples.reset_index
-turn2FloatAndMb=['Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb']
-roundDecimals=['Comp_BUSCOs_%','Comp_Single_BUSCOs_%','Het_%','GC_%','QV','Completeness','Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb']
+turn2FloatAndMb=['Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont_Mb','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb']
+roundDecimals=['Comp_BUSCOs_%','Comp_Single_BUSCOs_%','Het_%','GC_%','QV','Completeness','Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont_Mb','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb']
 print('this is samples',samples)
 sampleTransposed=samples.T
 print('this is sampleTransposed',sampleTransposed)
@@ -13,11 +13,14 @@ for i in range(0,4):
 	sampleTransposed[roundDecimals[i]]=sampleTransposed[roundDecimals[i]].str.replace('%','')
 for roundHeader in roundDecimals:
 	sampleTransposed[roundHeader]=sampleTransposed[roundHeader].astype(float).round(2)
-with open(snakemake.output[0], "w") as out:
-	print("", file=out)
-	print("\\blandscape", file=out)
-	print("", file=out)
-	print("\\tiny", file=out)
-	print(tabulate(sampleTransposed.rename_axis('ASM_ID'), headers='keys',tablefmt="pipe", showindex=True), file=out)
-	print("\\elandscape", file=out)
-	print("", file=out)
+with open(snakemake.output[0], "w") as out_markdown:
+	print("", file=out_markdown)
+	print("\\blandscape", file=out_markdown)
+	print("", file=out_markdown)
+	print("\\tiny", file=out_markdown)
+	print(tabulate(sampleTransposed.rename_axis('ASM_ID'), headers='keys',tablefmt="pipe", showindex=True), file=out_markdown)
+	print("\\elandscape", file=out_markdown)
+	print("", file=out_markdown)
+
+with open(snakemake.output[1], "w") as out_plain:
+	print(tabulate(sampleTransposed.rename_axis('ASM_ID'), headers='keys',tablefmt="plain", showindex=True), file=out_plain)
diff --git a/scripts/colouredHeatmap_legend.tsv b/scripts/colouredHeatmap_legend.tsv
new file mode 100644
index 0000000..526c788
--- /dev/null
+++ b/scripts/colouredHeatmap_legend.tsv
@@ -0,0 +1,5 @@
+Gaps_per_Gb    		Scaff_NG50_Mb   			Cont_NG50_Mb  		QV    		Completeness  		Comp_Single_BUSCOs_%
+'< 200'			'> 100Mbp'				'> 10Mbp'		'> 50'		'> 95%'			'> 95%'
+'200 - 1000'		'10Mbp - 100Mbp'			'1Mbp - 10Mbp'		'40 - 50'	'90% - 95%'		'90% - 95%'
+'1000 - 10000'		'0.1Mbp - 10Mbp'			'0.01Mbp - 1Mbp'	'35 - 40'	'80% - 90%'		'80% - 90%'
+'> 10000'		'< 0.1Mbp'				'< 0.01Mbp'		'< 35'		'< 80%'			'< 80%'
diff --git a/scripts/fullTable_heatmap_external.R b/scripts/fullTable_heatmap_external.R
new file mode 100644
index 0000000..3d251c8
--- /dev/null
+++ b/scripts/fullTable_heatmap_external.R
@@ -0,0 +1,132 @@
+suppressMessages(library(dplyr))
+suppressMessages(library(formattable))
+#suppressMessages(library(data.table))
+
+
+fullTableOfStats<-read.table(file = snakemake@input[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
+
+
+
+fullTableOfStats$Gaps_per_Gb <- ((fullTableOfStats$Gaps * fullTableOfStats$Est_Size_Mb)/1000)
+fullTableOfStats$Gaps_per_Gb <- as.integer(fullTableOfStats$Gaps_per_Gb)
+
+
+customPurple='#c699e8'
+customGreen='#8fc773'
+
+selectionOfStats_colouredHeatmap <- fullTableOfStats %>%
+  select(c('ASM_ID','ASM_LEVEL',
+           'Gaps_per_Gb', 'Scaff_NG50_Mb',
+           'Cont_NG50_Mb','QV',
+           'Completeness', 'Comp_Single_BUSCOs_%'))
+
+sink(file = snakemake@output[[1]])
+format_table(selectionOfStats_colouredHeatmap,
+             align =c("l","c","c","c","c", "c", "c", "c", "c"),
+             list(Gaps_per_Gb = formatter("span",
+                                          			style = ~style(display = "block",
+                                                         	padding = "0 4px",
+                                                         	`border-radius` = "3px",
+                                                         	`background-color` = ifelse(ASM_LEVEL == 'cont', "#666666",
+                                                                                     	ifelse(between(Gaps_per_Gb,1001, 10000), "#FFCC99",
+                                                                                            ifelse(between(Gaps_per_Gb,201 , 1000), customGreen,
+                                                                                                   ifelse(Gaps_per_Gb <= 200, customPurple, "#FF9999")))))),
+                  Scaff_NG50_Mb = formatter("span",
+                                            			style = ~style(display = "block",
+                                                           	padding = "0 4px",
+                                                           	`border-radius` = "3px",
+                                                           	`background-color` = ifelse(between(Scaff_NG50_Mb,0.1, 9.99999), "#FFCC99",
+                                                                                       ifelse(between(Scaff_NG50_Mb,10, 99.99999), customGreen,
+                                                                                              ifelse(Scaff_NG50_Mb >= 100, customPurple, "#FF9999"))))),
+                  Cont_NG50_Mb = formatter("span",
+                                           			style = ~style(display = "block",
+                                                          	padding = "0 4px",
+                                                          	`border-radius` = "3px",
+                                                          	`background-color` = ifelse(between(Cont_NG50_Mb,0.01, 0.99999), "#FFCC99",
+                                                                                      ifelse(between(Cont_NG50_Mb,1, 9.999999), customGreen,
+                                                                                             ifelse(Cont_NG50_Mb >= 10, customPurple, "#FF9999"))))),
+                  Completeness = formatter("span",
+                                           			style = ~style(display = "block",
+                                                          	padding = "0 4px",
+                                                          	`border-radius` = "3px",
+                                                          	`background-color` = ifelse(between(Completeness,80, 89.9999), "#FFCC99",
+                                                                                      ifelse(between(Completeness,90, 94.99999), customGreen,
+                                                                                             ifelse(Completeness >= 95, customPurple, "#FF9999"))))),
+                  `Comp_Single_BUSCOs_%` = formatter("span",
+                                                 		style = ~style(display = "block",
+                                                                padding = "0 4px",
+                                                                `border-radius` = "3px",
+                                                                `background-color` = ifelse(between(`Comp_Single_BUSCOs_%`,80, 89.9999), "#FFCC99",
+                                                                                            ifelse(between(`Comp_Single_BUSCOs_%`,90, 95), customGreen,
+                                                                                                   ifelse(`Comp_Single_BUSCOs_%` >= 95, customPurple, "#FF9999"))))),
+                  QV = formatter("span",
+                                 				style = ~style(display = "block",
+                                                		padding = "0 4px",
+                                                		`border-radius` = "3px",
+                                                		`background-color` = ifelse(between(QV,35, 39.9999999999), "#FFCC99",
+                                                                            		ifelse(between(QV,40, 49.999999999), customGreen,
+                                                                                   		ifelse(QV >= 50, customPurple, "#FF9999")))))))
+
+cat('<br>')
+cat("\n")
+cat('<br>')
+
+legendTable<-read.table(file = snakemake@params[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
+
+format_table(legendTable,
+            align =c("c","c","c", "c", "c", "c", "c"),
+            list(Gaps_per_Gb = formatter("span",
+                                         style = ~style(display = "block",
+                                                        padding = "0 4px",
+                                                        `border-radius` = "3px",
+							"font.size" = "12px",
+ 							"width" = '150px',
+                                                        `background-color` = ifelse(Gaps_per_Gb == '> 10000',"#FF9999",
+                                                                                    ifelse(Gaps_per_Gb == '1000 - 10000',"#FFCC99",
+                                                                                           ifelse(Gaps_per_Gb == '200 - 1000',customGreen, customPurple))))),
+                 Scaff_NG50_Mb = formatter("span",
+                                         style = ~style(display = "block",
+                                                        padding = "0 4px",
+                                                        `border-radius` = "3px",
+							"font.size" = "12px",
+							"width" = '150px',
+                                                        `background-color` = ifelse(Scaff_NG50_Mb == '< 0.1Mbp',"#FF9999",
+                                                                                    ifelse(Scaff_NG50_Mb == '0.1Mbp - 10Mbp',"#FFCC99",
+                                                                                           ifelse(Scaff_NG50_Mb == '10Mbp - 100Mbp',customGreen, customPurple))))),
+                 Cont_NG50_Mb = formatter("span",
+                                           style = ~style(display = "block",
+                                                          padding = "0 4px",
+                                                          `border-radius` = "3px",
+							  "font.size" = "12px",
+							  "width" = '150px',
+                                                          `background-color` = ifelse(Cont_NG50_Mb == '< 0.01Mbp',"#FF9999",
+                                                                                      ifelse(Cont_NG50_Mb == '0.01Mbp - 1Mbp',"#FFCC99",
+                                                                                             ifelse(Cont_NG50_Mb == '1Mbp - 10Mbp',customGreen, customPurple))))),
+                 QV = formatter("span",
+                                          style = ~style(display = "block",
+                                                         padding = "0 4px",
+                                                         `border-radius` = "3px",
+							 "font.size" = "12px",
+							 "width" = '100px',
+                                                         `background-color` = ifelse(QV == '< 35',"#FF9999",
+                                                                                     ifelse(QV == '35 - 40',"#FFCC99",
+                                                                                            ifelse(QV == '40 - 50',customGreen, customPurple))))),
+                 Completeness = formatter("span",
+                                style = ~style(display = "block",
+                                               padding = "0 4px",
+                                               `border-radius` = "3px",
+					       "font.size" = "12px",
+					       "width" = '150px',
+                                               `background-color` = ifelse(Completeness == '< 80%',"#FF9999",
+                                                                           ifelse(Completeness == '80% - 90%',"#FFCC99",
+                                                                                  ifelse(Completeness == '90% - 95%',customGreen, customPurple))))),
+                 `Comp_Single_BUSCOs_%` = formatter("span",
+                                          style = ~style(display = "block",
+                                                         padding = "0 4px",
+                                                         `border-radius` = "3px",
+							 "font.size" = "12px",
+							 "width" = '220px',
+                                                         `background-color` = ifelse(`Comp_Single_BUSCOs_%` == '< 80%',"#FF9999",
+                                                                                     ifelse(`Comp_Single_BUSCOs_%` == '80% - 90%',"#FFCC99",
+                                                                                            ifelse(`Comp_Single_BUSCOs_%` == '90% - 95%',customGreen, customPurple)))))))
+sink(file = NULL)
diff --git a/scripts/fullTable_heatmap_internalComparison.R b/scripts/fullTable_heatmap_internalComparison.R
new file mode 100644
index 0000000..e1d5ae7
--- /dev/null
+++ b/scripts/fullTable_heatmap_internalComparison.R
@@ -0,0 +1,140 @@
+suppressMessages(library(dplyr))
+suppressMessages(library(formattable))
+
+fullTableOfStats<-read.table(file = snakemake@input[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
+
+fullTableOfStats$Gaps_per_Gb <- ((fullTableOfStats$Gaps * fullTableOfStats$Est_Size_Mb)/1000)
+fullTableOfStats$Gaps_per_Gb <- as.integer(fullTableOfStats$Gaps_per_Gb)
+
+
+
+selectionOfStats_internalComparisonHeatmap <- fullTableOfStats %>%
+  select(c('ASM_ID','ASM_LEVEL', 'Bases_Mb', 'Het_%','GC_%', 'Gaps_per_Gb', 'Scaff', 'Cont',
+           'Longest_Scaff_Mb','Scaff_NG50_Mb', 'Scaff_NG95_Mb',
+           'Longest_Cont_Mb', 'Cont_NG50_Mb','Cont_NG95_Mb',
+           'QV', 'Completeness',
+           'Comp_BUSCOs_%','Comp_Single_BUSCOs_%'))
+
+
+
+
+customBlue_max = "#2e96ff"
+
+customBlue_min = "#dcecfc"
+
+
+customGray_max = "#8c8c8c"
+
+customGray_min = "#e6e6e6"
+
+sink(file = snakemake@output[[1]])
+
+format_table(selectionOfStats_internalComparisonHeatmap,
+             align =c("l","c","c","c","c", "c", "c", "c", "c"),
+             list(Bases_Mb = color_tile(customGray_min, customGray_max),
+                  `Het_%` = color_tile(customGray_min, customGray_max),
+                  `GC_%` = color_tile(customGray_min, customGray_max),
+                  Gaps_per_Gb = color_tile(customBlue_max, customBlue_min),
+                  Scaff = color_tile(customBlue_max,customBlue_min),
+                  Cont = color_tile(customBlue_max,customBlue_min),
+                  Longest_Scaff_Mb = color_tile(customBlue_min, customBlue_max),
+                  Scaff_NG50_Mb = color_tile(customBlue_min, customBlue_max),
+                  Scaff_NG95_Mb = color_tile(customBlue_min, customBlue_max),
+                  Longest_Cont_Mb = color_tile(customBlue_min, customBlue_max),
+                  Cont_NG50_Mb = color_tile(customBlue_min, customBlue_max),
+                  Cont_NG95_Mb = color_tile(customBlue_min, customBlue_max),
+                  QV = color_tile(customBlue_min, customBlue_max),
+                  Completeness = color_tile(customBlue_min, customBlue_max),
+                  `Comp_BUSCOs_%` = color_tile(customBlue_min, customBlue_max),
+                  `Comp_Single_BUSCOs_%` = color_tile(customBlue_min, customBlue_max)))
+
+cat('<br>')
+cat("\n")
+cat('<br>')
+
+legendTable<-read.table(file = snakemake@params[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
+
+format_table(legendTable,
+            align =c("c","c","c", "c", "c", "c", "c","c","c","c", "c", "c", "c", "c","c", "c"),
+            list(Bases_Mb = formatter("span",
+                                         style = ~style(display = "block",
+                                                        padding = "0 4px",
+                                                        `border-radius` = "3px",
+                                                        `background-color` = ifelse(Bases_Mb == 'Max',customGray_max, customGray_min))),
+                 `Het_%` = formatter("span",
+                                      style = ~style(display = "block",
+                                                     padding = "0 4px",
+                                                     `border-radius` = "3px",
+                                                     `background-color` = ifelse(`Het_%` == 'Max',customGray_max, customGray_min))),
+                 `GC_%` = formatter("span",
+                                     style = ~style(display = "block",
+                                                    padding = "0 4px",
+                                                    `border-radius` = "3px",
+                                                    `background-color` = ifelse(`GC_%` == 'Max',customGray_max, customGray_min))),
+                 Gaps_per_Gb = formatter("span",
+                                    style = ~style(display = "block",
+                                                   padding = "0 4px",
+                                                   `border-radius` = "3px",
+                                                   `background-color` = ifelse(Gaps_per_Gb == 'Min',customBlue_max, customBlue_min))),
+                 Scaff = formatter("span",
+                                         style = ~style(display = "block",
+                                                        padding = "0 4px",
+                                                        `border-radius` = "3px",
+                                                        `background-color` = ifelse(Scaff == 'Min',customBlue_max, customBlue_min))),
+                 Cont = formatter("span",
+                                         style = ~style(display = "block",
+                                                        padding = "0 4px",
+                                                        `border-radius` = "3px",
+                                                        `background-color` = ifelse(Cont == 'Min',customBlue_max, customBlue_min))),
+                 Longest_Scaff_Mb = formatter("span",
+                                    style = ~style(display = "block",
+                                                 padding = "0 4px",
+                                                 `border-radius` = "3px",
+                                                 `background-color` = ifelse(Longest_Scaff_Mb == 'Max',customBlue_max, customBlue_min))),
+                 Scaff_NG50_Mb = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(Scaff_NG50_Mb == 'Max',customBlue_max, customBlue_min))),
+                 Scaff_NG95_Mb = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(Scaff_NG95_Mb == 'Max',customBlue_max, customBlue_min))),
+                 Longest_Cont_Mb = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(Longest_Cont_Mb == 'Max',customBlue_max, customBlue_min))),
+                 Cont_NG50_Mb = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(Cont_NG50_Mb == 'Max',customBlue_max, customBlue_min))),
+                 Cont_NG95_Mb = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(Cont_NG95_Mb == 'Max',customBlue_max, customBlue_min))),
+                 QV = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(QV == 'Max',customBlue_max, customBlue_min))),
+                 Completeness = formatter("span",
+                                style = ~style(display = "block",
+                                               padding = "0 4px",
+                                               `border-radius` = "3px",
+                                               `background-color` = ifelse(Completeness == 'Max',customBlue_max, customBlue_min))),
+                 `Comp_BUSCOs_%` = formatter("span",
+                                style = ~style(display = "block",
+                                               padding = "0 4px",
+                                               `border-radius` = "3px",
+                                               `background-color` = ifelse(`Comp_BUSCOs_%` == 'Max',customBlue_max, customBlue_min))),
+                 `Comp_Single_BUSCOs_%` = formatter("span",
+                                style = ~style(display = "block",
+                                               padding = "0 4px",
+                                               `border-radius` = "3px",
+                                               `background-color` = ifelse(`Comp_Single_BUSCOs_%` == 'Max',customBlue_max, customBlue_min)))))
+
+sink(file = NULL)
diff --git a/scripts/internalComparison_legend.tsv b/scripts/internalComparison_legend.tsv
new file mode 100644
index 0000000..b8e16cc
--- /dev/null
+++ b/scripts/internalComparison_legend.tsv
@@ -0,0 +1,3 @@
+Bases_Mb   	Het_%    	GC_%  		Gaps_per_Gb  	 Scaff    	Cont   		Longest_Scaff_Mb 	Scaff_NG50_Mb 	Scaff_NG95_Mb   Longest_Cont_Mb 	Cont_NG50_Mb    Cont_NG95_Mb    QV    		Completeness    Comp_BUSCOs_%    Comp_Single_BUSCOs_%
+'Max'		'Max'		'Max'		'Max'		'Max'		'Max'		'Max'			'Max'		'Max'		'Max'			'Max'		'Max'		'Max'		'Max'		'Max'		'Max'
+'Min'		'Min'		'Min'		'Min'		'Min'		'Min'		'Min'			'Min'		'Min'		'Min'			'Min'		'Min'		'Min'		'Min'		'Min'		'Min'
diff --git a/scripts/tableOnSamePage.css b/scripts/tableOnSamePage.css
new file mode 100644
index 0000000..ad4e7ad
--- /dev/null
+++ b/scripts/tableOnSamePage.css
@@ -0,0 +1,3 @@
+<style>
+.main-container { width: 1200px; max-width:2800px;}
+</style>
-- 
GitLab