From 25d6b9be33a2bb450aec37fdf74242d08f0cecfd Mon Sep 17 00:00:00 2001 From: james94 <james94@mi.fu-berlin.de> Date: Fri, 5 Aug 2022 14:45:16 +0200 Subject: [PATCH] rules/build_hifi.smk update hifi build --- scripts/addFullTableForReport.py | 23 +-- scripts/colouredHeatmap_legend.tsv | 5 + scripts/fullTable_heatmap_external.R | 132 +++++++++++++++++ .../fullTable_heatmap_internalComparison.R | 140 ++++++++++++++++++ scripts/internalComparison_legend.tsv | 3 + scripts/tableOnSamePage.css | 3 + 6 files changed, 296 insertions(+), 10 deletions(-) create mode 100644 scripts/colouredHeatmap_legend.tsv create mode 100644 scripts/fullTable_heatmap_external.R create mode 100644 scripts/fullTable_heatmap_internalComparison.R create mode 100644 scripts/internalComparison_legend.tsv create mode 100644 scripts/tableOnSamePage.css diff --git a/scripts/addFullTableForReport.py b/scripts/addFullTableForReport.py index bb37771..6ac763b 100644 --- a/scripts/addFullTableForReport.py +++ b/scripts/addFullTableForReport.py @@ -2,8 +2,8 @@ import pandas as pd from tabulate import tabulate samples=pd.read_csv(snakemake.input.results, dtype=str, index_col=0, delim_whitespace=True, skip_blank_lines=True) # samples=samples.reset_index -turn2FloatAndMb=['Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb'] -roundDecimals=['Comp_BUSCOs_%','Comp_Single_BUSCOs_%','Het_%','GC_%','QV','Completeness','Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb'] +turn2FloatAndMb=['Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont_Mb','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb'] +roundDecimals=['Comp_BUSCOs_%','Comp_Single_BUSCOs_%','Het_%','GC_%','QV','Completeness','Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont_Mb','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb'] print('this is samples',samples) sampleTransposed=samples.T print('this is sampleTransposed',sampleTransposed) @@ -13,11 +13,14 @@ for i in range(0,4): sampleTransposed[roundDecimals[i]]=sampleTransposed[roundDecimals[i]].str.replace('%','') for roundHeader in roundDecimals: sampleTransposed[roundHeader]=sampleTransposed[roundHeader].astype(float).round(2) -with open(snakemake.output[0], "w") as out: - print("", file=out) - print("\\blandscape", file=out) - print("", file=out) - print("\\tiny", file=out) - print(tabulate(sampleTransposed.rename_axis('ASM_ID'), headers='keys',tablefmt="pipe", showindex=True), file=out) - print("\\elandscape", file=out) - print("", file=out) +with open(snakemake.output[0], "w") as out_markdown: + print("", file=out_markdown) + print("\\blandscape", file=out_markdown) + print("", file=out_markdown) + print("\\tiny", file=out_markdown) + print(tabulate(sampleTransposed.rename_axis('ASM_ID'), headers='keys',tablefmt="pipe", showindex=True), file=out_markdown) + print("\\elandscape", file=out_markdown) + print("", file=out_markdown) + +with open(snakemake.output[1], "w") as out_plain: + print(tabulate(sampleTransposed.rename_axis('ASM_ID'), headers='keys',tablefmt="plain", showindex=True), file=out_plain) diff --git a/scripts/colouredHeatmap_legend.tsv b/scripts/colouredHeatmap_legend.tsv new file mode 100644 index 0000000..526c788 --- /dev/null +++ b/scripts/colouredHeatmap_legend.tsv @@ -0,0 +1,5 @@ +Gaps_per_Gb Scaff_NG50_Mb Cont_NG50_Mb QV Completeness Comp_Single_BUSCOs_% +'< 200' '> 100Mbp' '> 10Mbp' '> 50' '> 95%' '> 95%' +'200 - 1000' '10Mbp - 100Mbp' '1Mbp - 10Mbp' '40 - 50' '90% - 95%' '90% - 95%' +'1000 - 10000' '0.1Mbp - 10Mbp' '0.01Mbp - 1Mbp' '35 - 40' '80% - 90%' '80% - 90%' +'> 10000' '< 0.1Mbp' '< 0.01Mbp' '< 35' '< 80%' '< 80%' diff --git a/scripts/fullTable_heatmap_external.R b/scripts/fullTable_heatmap_external.R new file mode 100644 index 0000000..3d251c8 --- /dev/null +++ b/scripts/fullTable_heatmap_external.R @@ -0,0 +1,132 @@ +suppressMessages(library(dplyr)) +suppressMessages(library(formattable)) +#suppressMessages(library(data.table)) + + +fullTableOfStats<-read.table(file = snakemake@input[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE) + + + +fullTableOfStats$Gaps_per_Gb <- ((fullTableOfStats$Gaps * fullTableOfStats$Est_Size_Mb)/1000) +fullTableOfStats$Gaps_per_Gb <- as.integer(fullTableOfStats$Gaps_per_Gb) + + +customPurple='#c699e8' +customGreen='#8fc773' + +selectionOfStats_colouredHeatmap <- fullTableOfStats %>% + select(c('ASM_ID','ASM_LEVEL', + 'Gaps_per_Gb', 'Scaff_NG50_Mb', + 'Cont_NG50_Mb','QV', + 'Completeness', 'Comp_Single_BUSCOs_%')) + +sink(file = snakemake@output[[1]]) +format_table(selectionOfStats_colouredHeatmap, + align =c("l","c","c","c","c", "c", "c", "c", "c"), + list(Gaps_per_Gb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(ASM_LEVEL == 'cont', "#666666", + ifelse(between(Gaps_per_Gb,1001, 10000), "#FFCC99", + ifelse(between(Gaps_per_Gb,201 , 1000), customGreen, + ifelse(Gaps_per_Gb <= 200, customPurple, "#FF9999")))))), + Scaff_NG50_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(between(Scaff_NG50_Mb,0.1, 9.99999), "#FFCC99", + ifelse(between(Scaff_NG50_Mb,10, 99.99999), customGreen, + ifelse(Scaff_NG50_Mb >= 100, customPurple, "#FF9999"))))), + Cont_NG50_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(between(Cont_NG50_Mb,0.01, 0.99999), "#FFCC99", + ifelse(between(Cont_NG50_Mb,1, 9.999999), customGreen, + ifelse(Cont_NG50_Mb >= 10, customPurple, "#FF9999"))))), + Completeness = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(between(Completeness,80, 89.9999), "#FFCC99", + ifelse(between(Completeness,90, 94.99999), customGreen, + ifelse(Completeness >= 95, customPurple, "#FF9999"))))), + `Comp_Single_BUSCOs_%` = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(between(`Comp_Single_BUSCOs_%`,80, 89.9999), "#FFCC99", + ifelse(between(`Comp_Single_BUSCOs_%`,90, 95), customGreen, + ifelse(`Comp_Single_BUSCOs_%` >= 95, customPurple, "#FF9999"))))), + QV = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(between(QV,35, 39.9999999999), "#FFCC99", + ifelse(between(QV,40, 49.999999999), customGreen, + ifelse(QV >= 50, customPurple, "#FF9999"))))))) + +cat('<br>') +cat("\n") +cat('<br>') + +legendTable<-read.table(file = snakemake@params[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE) + +format_table(legendTable, + align =c("c","c","c", "c", "c", "c", "c"), + list(Gaps_per_Gb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + "font.size" = "12px", + "width" = '150px', + `background-color` = ifelse(Gaps_per_Gb == '> 10000',"#FF9999", + ifelse(Gaps_per_Gb == '1000 - 10000',"#FFCC99", + ifelse(Gaps_per_Gb == '200 - 1000',customGreen, customPurple))))), + Scaff_NG50_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + "font.size" = "12px", + "width" = '150px', + `background-color` = ifelse(Scaff_NG50_Mb == '< 0.1Mbp',"#FF9999", + ifelse(Scaff_NG50_Mb == '0.1Mbp - 10Mbp',"#FFCC99", + ifelse(Scaff_NG50_Mb == '10Mbp - 100Mbp',customGreen, customPurple))))), + Cont_NG50_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + "font.size" = "12px", + "width" = '150px', + `background-color` = ifelse(Cont_NG50_Mb == '< 0.01Mbp',"#FF9999", + ifelse(Cont_NG50_Mb == '0.01Mbp - 1Mbp',"#FFCC99", + ifelse(Cont_NG50_Mb == '1Mbp - 10Mbp',customGreen, customPurple))))), + QV = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + "font.size" = "12px", + "width" = '100px', + `background-color` = ifelse(QV == '< 35',"#FF9999", + ifelse(QV == '35 - 40',"#FFCC99", + ifelse(QV == '40 - 50',customGreen, customPurple))))), + Completeness = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + "font.size" = "12px", + "width" = '150px', + `background-color` = ifelse(Completeness == '< 80%',"#FF9999", + ifelse(Completeness == '80% - 90%',"#FFCC99", + ifelse(Completeness == '90% - 95%',customGreen, customPurple))))), + `Comp_Single_BUSCOs_%` = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + "font.size" = "12px", + "width" = '220px', + `background-color` = ifelse(`Comp_Single_BUSCOs_%` == '< 80%',"#FF9999", + ifelse(`Comp_Single_BUSCOs_%` == '80% - 90%',"#FFCC99", + ifelse(`Comp_Single_BUSCOs_%` == '90% - 95%',customGreen, customPurple))))))) +sink(file = NULL) diff --git a/scripts/fullTable_heatmap_internalComparison.R b/scripts/fullTable_heatmap_internalComparison.R new file mode 100644 index 0000000..e1d5ae7 --- /dev/null +++ b/scripts/fullTable_heatmap_internalComparison.R @@ -0,0 +1,140 @@ +suppressMessages(library(dplyr)) +suppressMessages(library(formattable)) + +fullTableOfStats<-read.table(file = snakemake@input[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE) + +fullTableOfStats$Gaps_per_Gb <- ((fullTableOfStats$Gaps * fullTableOfStats$Est_Size_Mb)/1000) +fullTableOfStats$Gaps_per_Gb <- as.integer(fullTableOfStats$Gaps_per_Gb) + + + +selectionOfStats_internalComparisonHeatmap <- fullTableOfStats %>% + select(c('ASM_ID','ASM_LEVEL', 'Bases_Mb', 'Het_%','GC_%', 'Gaps_per_Gb', 'Scaff', 'Cont', + 'Longest_Scaff_Mb','Scaff_NG50_Mb', 'Scaff_NG95_Mb', + 'Longest_Cont_Mb', 'Cont_NG50_Mb','Cont_NG95_Mb', + 'QV', 'Completeness', + 'Comp_BUSCOs_%','Comp_Single_BUSCOs_%')) + + + + +customBlue_max = "#2e96ff" + +customBlue_min = "#dcecfc" + + +customGray_max = "#8c8c8c" + +customGray_min = "#e6e6e6" + +sink(file = snakemake@output[[1]]) + +format_table(selectionOfStats_internalComparisonHeatmap, + align =c("l","c","c","c","c", "c", "c", "c", "c"), + list(Bases_Mb = color_tile(customGray_min, customGray_max), + `Het_%` = color_tile(customGray_min, customGray_max), + `GC_%` = color_tile(customGray_min, customGray_max), + Gaps_per_Gb = color_tile(customBlue_max, customBlue_min), + Scaff = color_tile(customBlue_max,customBlue_min), + Cont = color_tile(customBlue_max,customBlue_min), + Longest_Scaff_Mb = color_tile(customBlue_min, customBlue_max), + Scaff_NG50_Mb = color_tile(customBlue_min, customBlue_max), + Scaff_NG95_Mb = color_tile(customBlue_min, customBlue_max), + Longest_Cont_Mb = color_tile(customBlue_min, customBlue_max), + Cont_NG50_Mb = color_tile(customBlue_min, customBlue_max), + Cont_NG95_Mb = color_tile(customBlue_min, customBlue_max), + QV = color_tile(customBlue_min, customBlue_max), + Completeness = color_tile(customBlue_min, customBlue_max), + `Comp_BUSCOs_%` = color_tile(customBlue_min, customBlue_max), + `Comp_Single_BUSCOs_%` = color_tile(customBlue_min, customBlue_max))) + +cat('<br>') +cat("\n") +cat('<br>') + +legendTable<-read.table(file = snakemake@params[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE) + +format_table(legendTable, + align =c("c","c","c", "c", "c", "c", "c","c","c","c", "c", "c", "c", "c","c", "c"), + list(Bases_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Bases_Mb == 'Max',customGray_max, customGray_min))), + `Het_%` = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(`Het_%` == 'Max',customGray_max, customGray_min))), + `GC_%` = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(`GC_%` == 'Max',customGray_max, customGray_min))), + Gaps_per_Gb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Gaps_per_Gb == 'Min',customBlue_max, customBlue_min))), + Scaff = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Scaff == 'Min',customBlue_max, customBlue_min))), + Cont = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Cont == 'Min',customBlue_max, customBlue_min))), + Longest_Scaff_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Longest_Scaff_Mb == 'Max',customBlue_max, customBlue_min))), + Scaff_NG50_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Scaff_NG50_Mb == 'Max',customBlue_max, customBlue_min))), + Scaff_NG95_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Scaff_NG95_Mb == 'Max',customBlue_max, customBlue_min))), + Longest_Cont_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Longest_Cont_Mb == 'Max',customBlue_max, customBlue_min))), + Cont_NG50_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Cont_NG50_Mb == 'Max',customBlue_max, customBlue_min))), + Cont_NG95_Mb = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Cont_NG95_Mb == 'Max',customBlue_max, customBlue_min))), + QV = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(QV == 'Max',customBlue_max, customBlue_min))), + Completeness = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(Completeness == 'Max',customBlue_max, customBlue_min))), + `Comp_BUSCOs_%` = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(`Comp_BUSCOs_%` == 'Max',customBlue_max, customBlue_min))), + `Comp_Single_BUSCOs_%` = formatter("span", + style = ~style(display = "block", + padding = "0 4px", + `border-radius` = "3px", + `background-color` = ifelse(`Comp_Single_BUSCOs_%` == 'Max',customBlue_max, customBlue_min))))) + +sink(file = NULL) diff --git a/scripts/internalComparison_legend.tsv b/scripts/internalComparison_legend.tsv new file mode 100644 index 0000000..b8e16cc --- /dev/null +++ b/scripts/internalComparison_legend.tsv @@ -0,0 +1,3 @@ +Bases_Mb Het_% GC_% Gaps_per_Gb Scaff Cont Longest_Scaff_Mb Scaff_NG50_Mb Scaff_NG95_Mb Longest_Cont_Mb Cont_NG50_Mb Cont_NG95_Mb QV Completeness Comp_BUSCOs_% Comp_Single_BUSCOs_% +'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' +'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' diff --git a/scripts/tableOnSamePage.css b/scripts/tableOnSamePage.css new file mode 100644 index 0000000..ad4e7ad --- /dev/null +++ b/scripts/tableOnSamePage.css @@ -0,0 +1,3 @@ +<style> +.main-container { width: 1200px; max-width:2800px;} +</style> -- GitLab