Skip to content
Snippets Groups Projects
Commit f3db9abb authored by james94's avatar james94
Browse files

folders

parent 4780d3e5
No related branches found
No related tags found
No related merge requests found
Showing
with 1057 additions and 10 deletions
cores: 64
dry-run: False
use-conda: True
latency-wait: 60
rerun-incomplete: True
printshellcmds: False
keep-going: False
#shadow-prefix: /srv/public/user/<user>/tempfileshere
#use-singularity: True
import numpy as np
from itertools import groupby
import json
import sys
import csv
import os
import pandas as pd
from tabulate import tabulate
#pd.set_option('display.float_format', lambda x: '%,g' % x)
with open(sys.argv[2]) as f:
ff=f.readlines()
# estSizeFile = open()
#
# size_bp = estSizeFile.readlines()
estSize=int(float(sys.argv[4]))
if estSize == 0:
size_bp = ff[0][:-1]
print("No estimated Genome Size provided, using estimation from Genomescope2: ", size_bp)
else:
print("Estimated Genome Size provided as:", estSize, " using this size for NG and LG stats")
size_bp = estSize
def fasta_iter(fasta_file):
"""
Takes a FASTA file, and produces a generator of Header and Sequences.
This is a memory-efficient way of analyzing a FASTA files -- without
reading the entire file into memory.
Parameters
----------
fasta_file : str
The file location of the FASTA file
Returns
-------
header: str
The string contained in the header portion of the sequence record
(everything after the '>')
seq: str
The sequence portion of the sequence record
"""
fh = open(fasta_file)
fa_iter = (x[1] for x in groupby(fh, lambda line: line[0] == ">"))
for header in fa_iter:
# drop the ">"
header = next(header)[1:].strip()
# join all sequence lines to one.
seq = "".join(s.upper().strip() for s in next(fa_iter))
yield header, seq
def read_genome(fasta_file):
"""
Takes a FASTA file, and produces 2 lists of sequence lengths. It also
calculates the GC Content, since this is the only statistic that is not
calculated based on sequence lengths.
Parameters
----------
fasta_file : str
The file location of the FASTA file
Returns
------
contig_lens: list
A list of lengths of all contigs in the genome.
scaffold_lens: list
A list of lengths of all scaffolds in the genome.
gc_cont: float
The percentage of total basepairs in the genome that are either G or C.
"""
gc = 0
total_len = 0
contig_lens = []
scaffold_lens = []
for _, seq in fasta_iter(fasta_file):
scaffold_lens.append(len(seq))
if "NN" in seq:
contig_list = seq.split("NN")
else:
contig_list = [seq]
for contig in contig_list:
if len(contig):
gc += contig.count('G') + contig.count('C')
total_len += len(contig)
contig_lens.append(len(contig))
gc_cont = (gc / total_len) * 100
# print("this is gc_content", gc_cont)
return contig_lens, scaffold_lens, gc_cont
def calculate_stats(seq_lens, gc_cont):
naming = sys.argv[3]
stats = {}
seq_array = np.array(seq_lens)
# stats['Assembly:']=naming
stats['sequence_count'] = seq_array.size
testsize=stats['sequence_count']
stats['number_of_gaps'] = 0
# print("this is the count",naming," ", testsize)
stats['gc_content (%)'] = gc_cont
sorted_lens = seq_array[np.argsort(-seq_array)]
stats['longest (bp)'] = int(sorted_lens[0])
testlongest= stats['longest (bp)']
# print("this is the longest", naming," ",testlongest)
stats['shortest (bp)'] = int(sorted_lens[-1])
# stats['median'] = np.median(sorted_lens)
# stats['mean'] = np.mean(sorted_lens)
stats['total_bps (bp)'] = int(np.sum(sorted_lens))
testprint=stats['total_bps (bp)']
# print("total_bp is", naming," ",testprint)
stats['estimated_size (bp)'] = int(size_bp)
csum = np.cumsum(sorted_lens)
# if stats['total_bps (bp)'] < stats['estimated_size (bp)']:
# csum_ng = np.append(csum, stats['estimated_size (bp)'])
# else:
# csum_ng=csum
for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
nx = int(stats['total_bps (bp)'] * (level / 100))
csumn = min(csum[csum >= nx])
l_level = int(np.where(csum == csumn)[0]) + 1
stats['L' + str(level)] = l_level
for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
# print("the totalbps are:", stats['total_bps (bp)'])
nx = int(stats['total_bps (bp)'] * (level / 100))
# print("this is the nx", nx)
# print("this is the csum", csum)
csumn = min(csum[csum >= nx])
# print("this is the csumn", csumn)
l_level = int(np.where(csum == csumn)[0])
n_level = int(sorted_lens[l_level])
stats['N' + str(level)] = n_level
# print(level, " ", n_level)
for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
# print("the estbps are:", stats['estimated_size (bp)'])
ngx = int(stats['estimated_size (bp)'] * (level / 100))
# print("this is the ngx", ngx)
# print("this is the csum", csum_ng)
# print("this is the csum", csum)
# print("this is the [csum >= ngx]", np.array(csum >= ngx))
new_array=np.array(csum >= ngx)
# print(np.any(new_array))
if np.any(new_array) == False:
csumng = csum[seq_array.size-1]
# print("this is the csumng", csumng)
lg_level = int(np.where(csum == csumng)[0]) + 1
stats['LG' + str(level)] = lg_level
elif np.any(new_array) == True:
csumng = min(csum[csum >= ngx])
# print("this is the csumng", csumng)
lg_level = int(np.where(csum == csumng)[0]) + 1
stats['LG' + str(level)] = lg_level
for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
ngx = int(stats['estimated_size (bp)'] * (level / 100))
new_array=np.array(csum >= ngx)
# print(np.any(new_array))
if np.any(new_array) == False:
csumng = csum[seq_array.size-1]
# print("this is the csumng", csumng)
lg_level = int(np.where(csum == csumng)[0])
ng_level = int(sorted_lens[lg_level])
stats['NG' + str(level)] = ng_level
elif np.any(new_array) == True:
csumng = min(csum[csum >= ngx])
# print("this is the csumng", csumng)
lg_level = int(np.where(csum == csumng)[0])
ng_level = int(sorted_lens[lg_level])
stats['NG' + str(level)] = ng_level
return stats
if __name__ == "__main__":
# print(size_bp)
# print(type(size_bp))
naming = sys.argv[3]
infilename = sys.argv[1]
contig_lens, scaffold_lens, gc_cont = read_genome(infilename)
# contig_stats = calculate_stats(contig_lens, gc_cont)
scaffold_stats = calculate_stats(scaffold_lens, gc_cont)
rounded_gc = round(gc_cont, 2)
# print("this is the rounded_gc: ", rounded_gc)
contig_stats = calculate_stats(contig_lens, gc_cont)
gaps=contig_stats.get('sequence_count') - scaffold_stats.get('sequence_count')
scaffold_stats['number_of_gaps'] = gaps
contig_stats['number_of_gaps'] = gaps
# print("this is the scaffold_stats: ", scaffold_stats)
# print(scaffold_stats)
# df_scaffold_all= pd.DataFrame.from_dict(scaffold_stats, orient= 'index')
# print(df_scaffold_all)
# df2 = pd.DataFrame([scaffold_stats]).T
# print(df2)
# s = pd.Series(scaffold_stats, name=naming)
# s.index.name = 'Assembly:'
# s.reset_index()
# print(s)
scaff_seq=pd.DataFrame(scaffold_stats.items(), columns=['Assembly:', naming])
# print("this is the scaff_seq: ", scaff_seq)
df_scaffold_top=scaff_seq.iloc[0:7,0:2]
# print("this is GC CONTENT", gc_cont)
df_scaffold_top[naming]=df_scaffold_top[naming].astype('int64').apply('{:,}'.format)
# print("this is the top: ", df_scaffold_top)
df_scaffold_top[naming][2] = rounded_gc
# print("this is df_scaffold_top[naming][2]", df_scaffold_top[naming][2])
# print("this is the top: ", df_scaffold_top)
# df_scaffold_top=df_scaffold_top.style.hide_index()
# df_scaffold_top[naming].round(decimals=0)
# df_contig_all=pd.DataFrame(data=contig_stats)
# df_contig_top=df_contig_all.iloc[0:6,0:2]
df_scaffold_Nxx=pd.DataFrame(scaffold_stats.items(), columns=['Nxx Level (%)', 'Length of Nxx Scaffold (bp)'])
df_scaffold_Nxx=df_scaffold_Nxx.iloc[31:55,0:2]
df_scaffold_Nxx=df_scaffold_Nxx.reset_index()
# print(df_scaffold_Nxx)
df_scaffold_NGxx=pd.DataFrame(scaffold_stats.items(), columns=['NGxx Level (%)', 'Length of NGxx Scaffold (bp)'])
df_scaffold_NGxx=df_scaffold_NGxx.iloc[79:104,0:2]
df_scaffold_NGxx=df_scaffold_NGxx.reset_index()
# print(df_scaffold_NGxx)
df_scaffold_N_NG=pd.concat([df_scaffold_Nxx,df_scaffold_NGxx], axis=1)
df_scaffold_N_NG=df_scaffold_N_NG.drop(df_scaffold_N_NG.columns[[0,3]], axis = 1)
df_scaffold_N_NG['Length of Nxx Scaffold (bp)']=df_scaffold_N_NG['Length of Nxx Scaffold (bp)'].astype('int64').apply('{:,}'.format)
df_scaffold_N_NG['Length of NGxx Scaffold (bp)']=df_scaffold_N_NG['Length of NGxx Scaffold (bp)'].astype('int64').apply('{:,}'.format)
# df_scaffold_N_NG=df_scaffold_N_NG.style.hide_index()
df_scaffold_Lxx=pd.DataFrame(scaffold_stats.items(), columns=['Lxx Level (%)', 'Count of scaffolds (for Lxx Level)'])
df_scaffold_Lxx=df_scaffold_Lxx.iloc[7:31,0:2]
df_scaffold_Lxx=df_scaffold_Lxx.reset_index()
# print(df_scaffold_Nxx)
df_scaffold_LGxx=pd.DataFrame(scaffold_stats.items(), columns=['LGxx Level (%)', 'Count of scaffolds (for LGxx Level)'])
df_scaffold_LGxx=df_scaffold_LGxx.iloc[55:79,0:2]
df_scaffold_LGxx=df_scaffold_LGxx.reset_index()
# print(df_scaffold_NGxx)
df_scaffold_L_LG=pd.concat([df_scaffold_Lxx,df_scaffold_LGxx], axis=1)
df_scaffold_L_LG=df_scaffold_L_LG.drop(df_scaffold_L_LG.columns[[0,3]], axis = 1)
df_scaffold_L_LG['Count of scaffolds (for Lxx Level)']=df_scaffold_L_LG['Count of scaffolds (for Lxx Level)'].astype('int64').apply('{:,}'.format)
df_scaffold_L_LG['Count of scaffolds (for LGxx Level)']=df_scaffold_L_LG['Count of scaffolds (for LGxx Level)'].astype('int64').apply('{:,}'.format)
################################################################################################################
contig_seq=pd.DataFrame(contig_stats.items(), columns=['Assembly:', naming])
df_contig_top=contig_seq.iloc[0:7,0:2]
df_contig_top[naming]=df_contig_top[naming].astype('int64').apply('{:,}'.format)
df_contig_top[naming][2] = rounded_gc
# df_scaffold_top=df_scaffold_top.style.hide_index()
# df_scaffold_top[naming].round(decimals=0)
# df_contig_all=pd.DataFrame(data=contig_stats)
# df_contig_top=df_contig_all.iloc[0:6,0:2]
df_contig_Nxx=pd.DataFrame(contig_stats.items(), columns=['Nxx Level (%)', 'Length of Nxx contig (bp)'])
df_contig_Nxx=df_contig_Nxx.iloc[31:55,0:2]
df_contig_Nxx=df_contig_Nxx.reset_index()
# print(df_scaffold_Nxx)
df_contig_NGxx=pd.DataFrame(contig_stats.items(), columns=['NGxx Level (%)', 'Length of NGxx contig (bp)'])
df_contig_NGxx=df_contig_NGxx.iloc[79:104,0:2]
df_contig_NGxx=df_contig_NGxx.reset_index()
# print(df_scaffold_NGxx)
df_contig_N_NG=pd.concat([df_contig_Nxx,df_contig_NGxx], axis=1)
df_contig_N_NG=df_contig_N_NG.drop(df_contig_N_NG.columns[[0,3]], axis = 1)
df_contig_N_NG['Length of Nxx contig (bp)']=df_contig_N_NG['Length of Nxx contig (bp)'].astype('int64').apply('{:,}'.format)
df_contig_N_NG['Length of NGxx contig (bp)']=df_contig_N_NG['Length of NGxx contig (bp)'].astype('int64').apply('{:,}'.format)
# df_scaffold_N_NG=df_scaffold_N_NG.style.hide_index()
df_contig_Lxx=pd.DataFrame(contig_stats.items(), columns=['Lxx Level (%)', 'Count of contig (for Lxx Level)'])
df_contig_Lxx=df_contig_Lxx.iloc[7:31,0:2]
df_contig_Lxx=df_contig_Lxx.reset_index()
# print(df_scaffold_Nxx)
df_contig_LGxx=pd.DataFrame(contig_stats.items(), columns=['LGxx Level (%)', 'Count of contig (for LGxx Level)'])
df_contig_LGxx=df_contig_LGxx.iloc[55:79,0:2]
df_contig_LGxx=df_contig_LGxx.reset_index()
# print(df_scaffold_NGxx)
df_contig_L_LG=pd.concat([df_contig_Lxx,df_contig_LGxx], axis=1)
df_contig_L_LG=df_contig_L_LG.drop(df_contig_L_LG.columns[[0,3]], axis = 1)
df_contig_L_LG['Count of contig (for Lxx Level)']=df_contig_L_LG['Count of contig (for Lxx Level)'].astype('int64').apply('{:,}'.format)
df_contig_L_LG['Count of contig (for LGxx Level)']=df_contig_L_LG['Count of contig (for LGxx Level)'].astype('int64').apply('{:,}'.format)
# df_scaffold_L_LG=df_scaffold_L_LG.style.hide_index()
# print(df_contig_top)
# print(scaffold_stats)
# stat_output = {'Contig Stats': contig_stats,
# 'Scaffold Stats': scaffold_stats}
# print(json.dumps(stat_output, indent=2, sort_keys=False))
# scaffold_out = {scaffold_stats}
# contig_out = {contig_stats}
# print(json.dumps(scaffold_stats, indent=2, sort_keys=False))
# print(json.dumps(contig_stats, indent=2, sort_keys=False))
# dict_writer = csv.DictWriter(stat_output, fieldnames=None, delimiter='\t')
# print(dict_writer)
# df_raw = pd.DataFrame.from_dict(data, orient='index')
# print(df_raw)
# with open('statdata.txt', 'w') as outfile:
# json.dump(scaffold_stats, outfile)
# with open('contigdata.txt', 'w') as out2file:
# json.dump(contig_stats, out2file)
# scaff = csv.writer(open(naming + "_scaffold_stats.tsv", "w"), delimiter='\t')
# for key, val in scaffold_stats.items():
# scaff.writerow([key, val])
#
# contig = csv.writer(open(naming + "_contig_stats.tsv", "w"), delimiter='\t')
# for key, val in contig_stats.items():
#
with open(sys.argv[5], 'w') as outputfile:
# # print('#' + libraryName, file=outputfile)
# # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile)
# # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile)
# # print("Successfully Processed reads (Paired): " + successfull + " ( "+str(successfull_perc)+"%)", file=outputfile)
print(df_scaffold_top.to_string(index=False), file=outputfile)
print("", file=outputfile)
print(df_scaffold_N_NG.to_string(index=False), file=outputfile)
print("", file=outputfile)
print(df_scaffold_L_LG.to_string(index=False), file=outputfile)
with open(sys.argv[6], 'w') as outputfile2:
# # print('#' + libraryName, file=outputfile)
# # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile)
# # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile)
# # print("Successfully Processed reads (Paired): " + successfull + " ( "+str(successfull_perc)+"%)", file=outputfile)
print(df_contig_top.to_string(index=False), file=outputfile2)
print("", file=outputfile2)
print(df_contig_N_NG.to_string(index=False), file=outputfile2)
print("", file=outputfile2)
print(df_contig_L_LG.to_string(index=False), file=outputfile2)
# with open(sys.argv[4], 'w') as outputRst:
# print(tabulate(df_scaffold_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst)
# print("", file=outputRst)
# print(tabulate(df_scaffold_N_NG, headers='keys',tablefmt="rst", showindex=False), file=outputRst)
# print("", file=outputRst)
# print(tabulate(df_scaffold_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst)
# print("", file=outputRst)
# #
# with open(sys.argv[5], 'w') as outputRst2:
# print(tabulate(df_contig_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst2)
# print("", file=outputRst2)
# print(tabulate(df_contig_N_NG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2)
# print("", file=outputRst2)
# print(tabulate(df_contig_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2)
# print("", file=outputRst2)
# with open(sys.argv[4], 'w') as outputRst:
# print(tabulate(df_scaffold_top, headers='keys',tablefmt="pipe", showindex=False), file=outputRst)
# print("", file=outputRst)
# print(tabulate(df_scaffold_N_NG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst)
# print("", file=outputRst)
# print(tabulate(df_scaffold_L_LG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst)
# print("", file=outputRst)
# #
# with open(sys.argv[5], 'w') as outputRst2:
# print(tabulate(df_contig_top, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2)
# print("", file=outputRst2)
# print(tabulate(df_contig_N_NG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2)
# print("", file=outputRst2)
# print(tabulate(df_contig_L_LG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2)
# print("", file=outputRst2)
# list_of_dfs=[df_scaffold_top,df_scaffold_N_NG,df_scaffold_L_LG]
# for df in list_of_dfs:
# with open('all_dfs.tsv','a') as f:
# df.to_csv(f)
# f.write("\n")
# with open("testok.tsv", 'w') as outputfile:
# # print('#' + libraryName, file=outputfile)
# # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile)
# # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile)
# # print("Successfully Processed reads (Paired): " + successfull + " ( "+str(successfull_perc)+"%)", file=outputfile)
# print(tabulate(loadinTop, headers='keys',tablefmt="rst", showindex=False), file=outputfile)
# # print('', file=outputfile)
# print(tabulate(result, headers='keys', tablefmt="psql", showindex=False), file=outputfile)
# print('', file=outputfile)
Gaps_per_Gb Scaff_NG50_Mb Cont_NG50_Mb QV Completeness Comp_Single_BUSCOs_%
'< 200' '> 100Mbp' '> 10Mbp' '> 50' '> 95%' '> 95%'
'200 - 1000' '10Mbp - 100Mbp' '1Mbp - 10Mbp' '40 - 50' '90% - 95%' '90% - 95%'
'1000 - 10000' '0.1Mbp - 10Mbp' '0.01Mbp - 1Mbp' '35 - 40' '80% - 90%' '80% - 90%'
'> 10000' '< 0.1Mbp' '< 0.01Mbp' '< 35' '< 80%' '< 80%'
suppressMessages(library(dplyr))
suppressMessages(library(formattable))
#suppressMessages(library(data.table))
fullTableOfStats<-read.table(file = snakemake@input[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
fullTableOfStats$Gaps_per_Gb <- ((fullTableOfStats$Gaps * fullTableOfStats$Est_Size_Mb)/1000)
fullTableOfStats$Gaps_per_Gb <- as.integer(fullTableOfStats$Gaps_per_Gb)
customPurple='#c699e8'
customGreen='#8fc773'
selectionOfStats_colouredHeatmap <- fullTableOfStats %>%
select(c('ASM_ID','ASM_LEVEL',
'Gaps_per_Gb', 'Scaff_NG50_Mb',
'Cont_NG50_Mb','QV',
'Completeness', 'Comp_Single_BUSCOs_%'))
sink(file = snakemake@output[[1]])
format_table(selectionOfStats_colouredHeatmap,
align =c("l","c","c","c","c", "c", "c", "c", "c"),
list(Gaps_per_Gb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(ASM_LEVEL == 'cont', "#666666",
ifelse(between(Gaps_per_Gb,1001, 10000), "#FFCC99",
ifelse(between(Gaps_per_Gb,201 , 1000), customGreen,
ifelse(Gaps_per_Gb <= 200, customPurple, "#FF9999")))))),
Scaff_NG50_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(between(Scaff_NG50_Mb,0.1, 9.99999), "#FFCC99",
ifelse(between(Scaff_NG50_Mb,10, 99.99999), customGreen,
ifelse(Scaff_NG50_Mb >= 100, customPurple, "#FF9999"))))),
Cont_NG50_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(between(Cont_NG50_Mb,0.01, 0.99999), "#FFCC99",
ifelse(between(Cont_NG50_Mb,1, 9.999999), customGreen,
ifelse(Cont_NG50_Mb >= 10, customPurple, "#FF9999"))))),
Completeness = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(between(Completeness,80, 89.9999), "#FFCC99",
ifelse(between(Completeness,90, 94.99999), customGreen,
ifelse(Completeness >= 95, customPurple, "#FF9999"))))),
`Comp_Single_BUSCOs_%` = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(between(`Comp_Single_BUSCOs_%`,80, 89.9999), "#FFCC99",
ifelse(between(`Comp_Single_BUSCOs_%`,90, 95), customGreen,
ifelse(`Comp_Single_BUSCOs_%` >= 95, customPurple, "#FF9999"))))),
QV = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(between(QV,35, 39.9999999999), "#FFCC99",
ifelse(between(QV,40, 49.999999999), customGreen,
ifelse(QV >= 50, customPurple, "#FF9999")))))))
cat('<br>')
cat("\n")
cat('<br>')
legendTable<-read.table(file = snakemake@params[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
format_table(legendTable,
align =c("c","c","c", "c", "c", "c", "c"),
list(Gaps_per_Gb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
"font.size" = "12px",
"width" = '150px',
`background-color` = ifelse(Gaps_per_Gb == '> 10000',"#FF9999",
ifelse(Gaps_per_Gb == '1000 - 10000',"#FFCC99",
ifelse(Gaps_per_Gb == '200 - 1000',customGreen, customPurple))))),
Scaff_NG50_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
"font.size" = "12px",
"width" = '150px',
`background-color` = ifelse(Scaff_NG50_Mb == '< 0.1Mbp',"#FF9999",
ifelse(Scaff_NG50_Mb == '0.1Mbp - 10Mbp',"#FFCC99",
ifelse(Scaff_NG50_Mb == '10Mbp - 100Mbp',customGreen, customPurple))))),
Cont_NG50_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
"font.size" = "12px",
"width" = '150px',
`background-color` = ifelse(Cont_NG50_Mb == '< 0.01Mbp',"#FF9999",
ifelse(Cont_NG50_Mb == '0.01Mbp - 1Mbp',"#FFCC99",
ifelse(Cont_NG50_Mb == '1Mbp - 10Mbp',customGreen, customPurple))))),
QV = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
"font.size" = "12px",
"width" = '100px',
`background-color` = ifelse(QV == '< 35',"#FF9999",
ifelse(QV == '35 - 40',"#FFCC99",
ifelse(QV == '40 - 50',customGreen, customPurple))))),
Completeness = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
"font.size" = "12px",
"width" = '150px',
`background-color` = ifelse(Completeness == '< 80%',"#FF9999",
ifelse(Completeness == '80% - 90%',"#FFCC99",
ifelse(Completeness == '90% - 95%',customGreen, customPurple))))),
`Comp_Single_BUSCOs_%` = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
"font.size" = "12px",
"width" = '220px',
`background-color` = ifelse(`Comp_Single_BUSCOs_%` == '< 80%',"#FF9999",
ifelse(`Comp_Single_BUSCOs_%` == '80% - 90%',"#FFCC99",
ifelse(`Comp_Single_BUSCOs_%` == '90% - 95%',customGreen, customPurple)))))))
sink(file = NULL)
suppressMessages(library(dplyr))
suppressMessages(library(formattable))
fullTableOfStats<-read.table(file = snakemake@input[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
fullTableOfStats$Gaps_per_Gb <- ((fullTableOfStats$Gaps * fullTableOfStats$Est_Size_Mb)/1000)
fullTableOfStats$Gaps_per_Gb <- as.integer(fullTableOfStats$Gaps_per_Gb)
selectionOfStats_internalComparisonHeatmap <- fullTableOfStats %>%
select(c('ASM_ID','ASM_LEVEL', 'Bases_Mb', 'Het_%','GC_%', 'Gaps_per_Gb', 'Scaff', 'Cont',
'Longest_Scaff_Mb','Scaff_NG50_Mb', 'Scaff_NG95_Mb',
'Longest_Cont_Mb', 'Cont_NG50_Mb','Cont_NG95_Mb',
'QV', 'Completeness',
'Comp_BUSCOs_%','Comp_Single_BUSCOs_%'))
customBlue_max = "#2e96ff"
customBlue_min = "#dcecfc"
customGray_max = "#8c8c8c"
customGray_min = "#e6e6e6"
sink(file = snakemake@output[[1]])
format_table(selectionOfStats_internalComparisonHeatmap,
align =c("l","c","c","c","c", "c", "c", "c", "c"),
list(Bases_Mb = color_tile(customGray_min, customGray_max),
`Het_%` = color_tile(customGray_min, customGray_max),
`GC_%` = color_tile(customGray_min, customGray_max),
Gaps_per_Gb = color_tile(customBlue_max, customBlue_min),
Scaff = color_tile(customBlue_max,customBlue_min),
Cont = color_tile(customBlue_max,customBlue_min),
Longest_Scaff_Mb = color_tile(customBlue_min, customBlue_max),
Scaff_NG50_Mb = color_tile(customBlue_min, customBlue_max),
Scaff_NG95_Mb = color_tile(customBlue_min, customBlue_max),
Longest_Cont_Mb = color_tile(customBlue_min, customBlue_max),
Cont_NG50_Mb = color_tile(customBlue_min, customBlue_max),
Cont_NG95_Mb = color_tile(customBlue_min, customBlue_max),
QV = color_tile(customBlue_min, customBlue_max),
Completeness = color_tile(customBlue_min, customBlue_max),
`Comp_BUSCOs_%` = color_tile(customBlue_min, customBlue_max),
`Comp_Single_BUSCOs_%` = color_tile(customBlue_min, customBlue_max)))
cat('<br>')
cat("\n")
cat('<br>')
legendTable<-read.table(file = snakemake@params[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
format_table(legendTable,
align =c("c","c","c", "c", "c", "c", "c","c","c","c", "c", "c", "c", "c","c", "c"),
list(Bases_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Bases_Mb == 'Max',customGray_max, customGray_min))),
`Het_%` = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(`Het_%` == 'Max',customGray_max, customGray_min))),
`GC_%` = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(`GC_%` == 'Max',customGray_max, customGray_min))),
Gaps_per_Gb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Gaps_per_Gb == 'Min',customBlue_max, customBlue_min))),
Scaff = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Scaff == 'Min',customBlue_max, customBlue_min))),
Cont = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Cont == 'Min',customBlue_max, customBlue_min))),
Longest_Scaff_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Longest_Scaff_Mb == 'Max',customBlue_max, customBlue_min))),
Scaff_NG50_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Scaff_NG50_Mb == 'Max',customBlue_max, customBlue_min))),
Scaff_NG95_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Scaff_NG95_Mb == 'Max',customBlue_max, customBlue_min))),
Longest_Cont_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Longest_Cont_Mb == 'Max',customBlue_max, customBlue_min))),
Cont_NG50_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Cont_NG50_Mb == 'Max',customBlue_max, customBlue_min))),
Cont_NG95_Mb = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Cont_NG95_Mb == 'Max',customBlue_max, customBlue_min))),
QV = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(QV == 'Max',customBlue_max, customBlue_min))),
Completeness = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(Completeness == 'Max',customBlue_max, customBlue_min))),
`Comp_BUSCOs_%` = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(`Comp_BUSCOs_%` == 'Max',customBlue_max, customBlue_min))),
`Comp_Single_BUSCOs_%` = formatter("span",
style = ~style(display = "block",
padding = "0 4px",
`border-radius` = "3px",
`background-color` = ifelse(`Comp_Single_BUSCOs_%` == 'Max',customBlue_max, customBlue_min)))))
sink(file = NULL)
Bases_Mb Het_% GC_% Gaps_per_Gb Scaff Cont Longest_Scaff_Mb Scaff_NG50_Mb Scaff_NG95_Mb Longest_Cont_Mb Cont_NG50_Mb Cont_NG95_Mb QV Completeness Comp_BUSCOs_% Comp_Single_BUSCOs_%
'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max' 'Max'
'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min' 'Min'
#!/usr/bin/perl
use strict;
use warnings;
my $prev_id = "";
my @five;
my @three;
my @unmap;
my @mid;
my @all;
my $counter = 0;
while (<STDIN>){
chomp;
if (/^@/){
print $_."\n";
next;
}
my ($id, $flag, $chr_from, $loc_from, $mapq, $cigar, $d1, $d2, $d3, $read, $read_qual, @rest) = split /\t/;
my $bin = reverse(dec2bin($flag));
my @binary = split(//,$bin);
if ($prev_id ne $id && $prev_id ne ""){
if ($counter == 1){
if (@five == 1){
print $five[0]."\n";
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//,$bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("",@binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
}
}
elsif ($counter == 2 && @five == 1){
print $five[0]."\n";
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//,$bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("",@binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
}
$counter = 0;
undef @unmap;
undef @five;
undef @three;
undef @mid;
undef @all;
}
$counter++;
$prev_id = $id;
push @all,$_;
if ($binary[2]==1){
push @unmap,$_;
}
elsif ($binary[4]==0 && $cigar =~ m/^[0-9]*M/ || $binary[4]==1 && $cigar =~ m/.*M$/){
push @five, $_;
}
elsif ($binary[4]==1 && $cigar =~ m/^[0-9]*M/ || $binary[4]==0 && $cigar =~ m/.*M$/){
push @three, $_;
}
elsif ($cigar =~ m/^[0-9]*[HS].*M.*[HS]$/){
push @mid, $_;
}
}
if ($counter == 1){
if (@five == 1){
print $five[0]."\n";
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//,$bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("",@binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
}
}
elsif ($counter == 2 && @five == 1){
print $five[0]."\n";
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//,$bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("",@binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
}
sub dec2bin {
my $str = unpack("B32", pack("N", shift));
return $str;
}
sub bin2dec {
return unpack("N", pack("B32", substr("0" x 32 . shift, -32)));
}
#!/usr/bin/perl
use strict;
MAIN : {
my ($read1_bam, $read2_bam) = @ARGV;
if ((not defined $read1_bam) ||
(not defined $read2_bam)) {
die ("Usage: ./two_read_bam_combiner.pl <read 1 bam> <read 2 bam>\n");
}
open(FILE1,"samtools view -h $read1_bam |");
open(FILE2,"samtools view -h $read2_bam |");
my $line1 = <FILE1>;
my $line2 = <FILE2>;
my $counter = 0;
my $new_counter = 0;
while (defined $line1) {
## the line directly below this needed to be modified slightly from the genotyping pipeline ##
if ($line1 =~ /^(\@)SQ/){
if ($line1 ne $line2){print $line1;print $line2; die ("inconsistent bam header.");}
else{
print $line1;
}
$line1 = <FILE1>;
$line2 = <FILE2>;
next;
}
$counter++;
if ($counter == ($new_counter + 1000000)) {
print STDERR $counter . "\n";
$new_counter = $counter;
}
chomp $line1;
chomp $line2;
my ($id1, $flag1, $chr_from1, $loc_from1, $mapq1, $cigar1, $d1_1, $d2_1, $d3_1, $read1, $read_qual1, @rest1) = split(/\t/,$line1);
my ($id2, $flag2, $chr_from2, $loc_from2, $mapq2, $cigar2, $d1_2, $d2_2, $d3_2, $read2, $read_qual2, @rest2) = split(/\t/,$line2);
if ($id1 ne $id2) {
die ("The id's of the two files do not match up at line number $counter\n");
}
my $bin1 = reverse(dec2bin($flag1));
my $bin2 = reverse(dec2bin($flag2));
my @binary1 = split(//,$bin1);
my @binary2 = split(//,$bin2);
my $trouble = 0;
if (($binary1[2] == 1) || ($mapq1 < 10)) {
$trouble = 1;
}
if (($binary2[2]== 1) || ($mapq2 < 10)) {
$trouble = 1;
}
my $proper_pair1;
my $proper_pair2;
my $dist1;
my $dist2;
if (($binary1[2] == 0) && ($binary2[2] == 0)) {
$proper_pair1 = 1;
$proper_pair2 = 1;
if ($chr_from1 eq $chr_from2) {
my $dist = abs($loc_from1 - $loc_from2);
if ($loc_from1 >= $loc_from2) {
$dist1 = -1*$dist;
$dist2 = $dist;
} else {
$dist1 = $dist;
$dist2 = -1*$dist;
}
} else {
$dist1 = 0;
$dist2 = 0;
}
} else {
$proper_pair1 = 0;
$proper_pair2 = 0;
$dist1 = 0;
$dist2 = 0;
}
my $new_bin1 = join("","000000000000000000000",$binary1[10],$binary1[9],"0","0","1",$binary2[4],$binary1[4],$binary2[2],$binary1[2],$proper_pair1,"1");
my $new_bin2 = join("","000000000000000000000",$binary2[10],$binary2[9],"0","1","0",$binary1[4],$binary2[4],$binary1[2],$binary2[2],$proper_pair2,"1");
my $new_flag1 = bin2dec($new_bin1);
my $new_flag2 = bin2dec($new_bin2);
unless ($trouble == 1) {
print(join("\t",$id1,$new_flag1,$chr_from1,$loc_from1,$mapq1,$cigar1,$chr_from2,$loc_from2,$dist1,$read1,$read_qual1,@rest1) . "\n");
print(join("\t",$id2,$new_flag2,$chr_from2,$loc_from2,$mapq2,$cigar2,$chr_from1,$loc_from1,$dist2,$read2,$read_qual2,@rest2) . "\n");
}
$line1 = <FILE1>;
$line2 = <FILE2>;
}
}
sub dec2bin {
my $str = unpack("B32", pack("N", shift));
return $str;
}
sub bin2dec {
return unpack("N", pack("B32", substr("0" x 32 . shift, -32)));
}
import pandas as pd
from tabulate import tabulate
samples=pd.read_csv(snakemake.input.results, dtype=str, index_col=0, delim_whitespace=True, skip_blank_lines=True)
# samples=samples.reset_index
turn2FloatAndMb=['Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont_Mb','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb']
roundDecimals=['Comp_BUSCOs_%','Comp_Single_BUSCOs_%','Het_%','GC_%','QV','Completeness','Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont_Mb','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb']
print('this is samples',samples)
sampleTransposed=samples.T
print('this is sampleTransposed',sampleTransposed)
for header in turn2FloatAndMb:
sampleTransposed[header]=sampleTransposed[header].astype(float).div(1000000)
for i in range(0,4):
sampleTransposed[roundDecimals[i]]=sampleTransposed[roundDecimals[i]].str.replace('%','')
for roundHeader in roundDecimals:
sampleTransposed[roundHeader]=sampleTransposed[roundHeader].astype(float).round(2)
with open(snakemake.output[0], "w") as out_markdown:
print("", file=out_markdown)
print("\\blandscape", file=out_markdown)
print("", file=out_markdown)
print("\\tiny", file=out_markdown)
print(tabulate(sampleTransposed.rename_axis('ASM_ID'), headers='keys',tablefmt="pipe", showindex=True), file=out_markdown)
print("\\elandscape", file=out_markdown)
print("", file=out_markdown)
with open(snakemake.output[1], "w") as out_plain:
print(tabulate(sampleTransposed.rename_axis('ASM_ID'), headers='keys',tablefmt="plain", showindex=True), file=out_plain)
import os
import numpy as np
import pandas as pd
from tabulate import tabulate
key_stats=snakemake.input[0]
#key_stats="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_aggregatedResults.tsv"
key_stats_table = pd.read_csv(key_stats, dtype=str, delim_whitespace=True, skip_blank_lines=True)
key_stats_table = key_stats_table.iloc[:-4 , :]
genomescope_linear=snakemake.input[1]
#genomescope_linear="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_gScope_linear_plot.png"
genomescope_log=snakemake.input[2]
#genomescope_log="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_gScope_log_plot.png"
QV_score=snakemake.input[3]
#QV_score="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_merq.qv"
QV_score_table = pd.read_csv(QV_score, dtype=str, delim_whitespace=True, skip_blank_lines=True, names=["assembly_name","Assembly_Only", "Total_Kmers", "QV_Score", "Error"])
QV_score_table = QV_score_table.drop(['Error'], axis=1).set_index(['assembly_name'])
kmer_completeness=snakemake.input[4]
##kmer_completeness="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_merq.completeness.stats"
kmer_completeness_table = pd.read_csv(kmer_completeness, dtype=str, delim_whitespace=True, skip_blank_lines=True, names=["assembly_name","all","Kmers_Assembly", "Kmers_Reads", "%"])
kmer_completeness_table = kmer_completeness_table.drop(["all"], axis=1).set_index(['assembly_name'])
busco=snakemake.input[5]
#busco="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/only_busco_scores.txt"
with open(busco) as scoresOnlyFile:
lines = scoresOnlyFile.readlines()
lines = [line.rstrip() for line in lines]
kmer_flat=snakemake.input[6]
#kmer_flat="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_merq.L_sceleratus.spectra-cn.fl.png"
kmer_stacked=snakemake.input[7]
#kmer_stacked="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_merq.L_sceleratus.spectra-cn.st.png"
params_assemblyID=snakemake.params[0]
#params_assemblyID="Lagocephalus sceleratus"
params_merylKmer=snakemake.params[1]
# params_merylKmer="21"
params_buscoDB=snakemake.params[2]
# params_buscoDB="Vertebrata"
with open(snakemake.output[0], 'w') as outFile:
# print("---", "title: 'GEP Quick-View'", "author: James Sullivan", "date: 11/09/2021", "geometry: margin=2cm", "classoption: table", "documentclass: extarticle", "urlcolor: blue", "colorlinks: true", "header-includes: |", " \\rowcolors{2}{gray!10}{gray!25}" , " ```{=latex}", "---", sep="\n")
print("", file=outFile)
print("\\twocolumn", file=outFile)
print("\\Large", file=outFile)
print("# Assembly:",params_assemblyID, file=outFile)
print("", file=outFile)
print(" - db built with kmer size ", params_merylKmer, "bp", file=outFile)
print("", file=outFile)
print("### Genome Statistics", file=outFile)
print("", file=outFile)
print(tabulate(key_stats_table, headers='keys',tablefmt="pipe", showindex=False), file=outFile)
print("", file=outFile)
print("### Genomescope2 Profile (Linear)", file=outFile)
print("![](", genomescope_linear, "){ width=37% }", file=outFile)
print("", file=outFile)
print("### Genomescope2 Profile (Log)", file=outFile)
print("![](", genomescope_log, "){ width=37% }", file=outFile)
print("\\", file=outFile)
print("\\pagebreak", file=outFile)
print("\\", file=outFile)
print("", file=outFile)
print("\\large", file=outFile)
print("### QV Score", file=outFile)
print(tabulate(QV_score_table, headers='keys',tablefmt="pipe", showindex=True), file=outFile)
print("", file=outFile)
print("\\large", file=outFile)
print("### Kmer Completeness", file=outFile)
print(tabulate(kmer_completeness_table, headers='keys',tablefmt="pipe", showindex=True), file=outFile)
print("", file=outFile)
print("\\Large", file=outFile)
print("### BUSCOv5 (database: ", params_buscoDB, ")", file=outFile)
print("```", file=outFile)
for line in lines:
print(line, file=outFile)
print("```", file=outFile)
print("\\", file=outFile)
print("\\large", file=outFile)
print("", file=outFile)
print("### K-mer Multiplicity PRI Only (Stacked)", file=outFile)
print("![](", kmer_flat, "){ width=44% }", file=outFile)
print("\\", file=outFile)
print("", file=outFile)
print("### K-mer Multiplicity PRI + ALT (Stacked)", file=outFile)
print("![](", kmer_stacked, "){ width=44% }", file=outFile)
print("\\", file=outFile)
print("\\pagebreak", file=outFile)
# ---
# title: "GEP Quick-View"
# date: 11/09/2021
# geometry: a4paper
# classoption: table
# documentclass: extarticle
# urlcolor: blue
# #pdf_document: null
# colorlinks: true
# header-includes: |
# \rowcolors{2}{gray!10}{gray!25}
# ```{=latex}
# \usepackage{pdflscape}
# \usepackage{longtable}
# \newcommand{\blandscape}{\begin{landscape}}
# \newcommand{\elandscape}{\end{landscape}}
# ```
# output: pdf_document
# ---
#
# print("---", "title: 'GEP Quick-View'", "author: James Sullivan", "date: 11/09/2021", "geometry: margin=2cm", "classoption: table", "documentclass: extarticle", "urlcolor: blue", "colorlinks: true", "header-includes: |", " \\rowcolors{2}{gray!10}{gray!25}" , " ```{=latex}", "" sep="\n")
import shutil
shutil.copyfile(snakemake.input.PretextMap, snakemake.output.pretextCP2keyResults)
with open(snakemake.output.IndividualPretextMD, "w") as out:
print("", file=out)
print("###",snakemake.params.assemblyName," HiC Heatmap", file=out)
print("![](", snakemake.input.PretextMap, "){ height=30% }", file=out)
---
geometry: b3paper, margin=1.3cm
classoption:
- table
- twocolumn
urlcolor: blue
colorlinks: true
header-includes:
- |
```{=latex}
\rowcolors{2}{gray!10}{gray!25}
\usepackage{longtable}
\usepackage{supertabular}
\let\longtable\supertabular
\let\endlongtable\endsupertabular
\let\endhead\relax
```
output: pdf_document
---
<style>
.main-container { width: 1200px; max-width:2800px;}
</style>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment