folders

f3db9abb · james94 · 4780d3e5 · 4780d3e5 · f3db9abb · f3db9abb
Commit f3db9abb authored Aug 6, 2022 by james94
--- a/SUBMIT_CONFIG/local/config.yaml
+++ b/SUBMIT_CONFIG/local/config.yaml
-cores: 64
-dry-run: False
-use-conda: True
-latency-wait: 60
-rerun-incomplete: True
-printshellcmds: False
-keep-going: False
-#shadow-prefix: /srv/public/user/<user>/tempfileshere
-#use-singularity: True
--- a/scripts/assembly_stats/stats.py
+++ b/scripts/assembly_stats/stats.py
+import numpy as np
+from itertools import groupby
+import json
+import sys
+import csv
+import os
+import pandas as pd
+from tabulate import tabulate
+#pd.set_option('display.float_format', lambda x: '%,g' % x)
+with open(sys.argv[2]) as f:
+	ff=f.readlines()
+# estSizeFile = open()
+#
+# size_bp = estSizeFile.readlines()
+estSize=int(float(sys.argv[4]))
+if estSize == 0:
+	size_bp = ff[0][:-1]
+	print("No estimated Genome Size provided, using estimation from Genomescope2: ", size_bp)
+else:
+	print("Estimated Genome Size provided as:", estSize, " using this size for NG and LG stats")
+	size_bp = estSize
+def fasta_iter(fasta_file):
+	"""
+	Takes a FASTA file, and produces a generator of Header and Sequences.
+	This is a memory-efficient way of analyzing a FASTA files -- without
+	reading the entire file into memory.
+	Parameters
+	----------
+	fasta_file : str
+	The file location of the FASTA file
+	Returns
+	-------
+	header: str
+	The string contained in the header portion of the sequence record
+	(everything after the '>')
+	seq: str
+	The sequence portion of the sequence record
+	"""
+	fh = open(fasta_file)
+	fa_iter = (x[1] for x in groupby(fh, lambda line: line[0] == ">"))
+	for header in fa_iter:
+		# drop the ">"
+		header = next(header)[1:].strip()
+		# join all sequence lines to one.
+		seq = "".join(s.upper().strip() for s in next(fa_iter))
+		yield header, seq
+def read_genome(fasta_file):
+	"""
+	Takes a FASTA file, and produces 2 lists of sequence lengths. It also
+	calculates the GC Content, since this is the only statistic that is not
+	calculated based on sequence lengths.
+	Parameters
+	----------
+	fasta_file : str
+		The file location of the FASTA file
+	Returns
+	------
+	contig_lens: list
+		A list of lengths of all contigs in the genome.
+	scaffold_lens: list
+		A list of lengths of all scaffolds in the genome.
+	gc_cont: float
+	The percentage of total basepairs in the genome that are either G or C.
+	"""
+	gc = 0
+	total_len = 0
+	contig_lens = []
+	scaffold_lens = []
+	for _, seq in fasta_iter(fasta_file):
+		scaffold_lens.append(len(seq))
+		if "NN" in seq:
+			contig_list = seq.split("NN")
+		else:
+			contig_list = [seq]
+		for contig in contig_list:
+			if len(contig):
+				gc += contig.count('G') + contig.count('C')
+				total_len += len(contig)
+				contig_lens.append(len(contig))
+				gc_cont = (gc / total_len) * 100
+#	print("this is gc_content", gc_cont)
+	return contig_lens, scaffold_lens, gc_cont
+def calculate_stats(seq_lens, gc_cont):
+	naming = sys.argv[3]
+	stats = {}
+	seq_array = np.array(seq_lens)
+#    stats['Assembly:']=naming
+	stats['sequence_count'] = seq_array.size
+	testsize=stats['sequence_count']
+	stats['number_of_gaps'] = 0
+#    print("this is the count",naming," ", testsize)
+	stats['gc_content (%)'] = gc_cont
+	sorted_lens = seq_array[np.argsort(-seq_array)]
+	stats['longest (bp)'] = int(sorted_lens[0])
+	testlongest= stats['longest (bp)']
+#    print("this is the longest", naming," ",testlongest)
+	stats['shortest (bp)'] = int(sorted_lens[-1])
+#    stats['median'] = np.median(sorted_lens)
+#    stats['mean'] = np.mean(sorted_lens)
+	stats['total_bps (bp)'] = int(np.sum(sorted_lens))
+	testprint=stats['total_bps (bp)']
+#    print("total_bp is", naming," ",testprint)
+	stats['estimated_size (bp)'] = int(size_bp)
+	csum = np.cumsum(sorted_lens)
+	# if stats['total_bps (bp)'] < stats['estimated_size (bp)']:
+    #     csum_ng = np.append(csum, stats['estimated_size (bp)'])
+    # else:
+    #     csum_ng=csum
+	for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
+		nx = int(stats['total_bps (bp)'] * (level / 100))
+		csumn = min(csum[csum >= nx])
+		l_level = int(np.where(csum == csumn)[0]) + 1
+		stats['L' + str(level)] = l_level
+	for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
+#        print("the totalbps are:", stats['total_bps (bp)'])
+		nx = int(stats['total_bps (bp)'] * (level / 100))
+#        print("this is the nx", nx)
+#        print("this is the csum", csum)
+		csumn = min(csum[csum >= nx])
+#       print("this is the csumn", csumn)
+		l_level = int(np.where(csum == csumn)[0])
+		n_level = int(sorted_lens[l_level])
+		stats['N' + str(level)] = n_level
+#        print(level, " ", n_level)
+	for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
+#        print("the estbps are:", stats['estimated_size (bp)'])
+		ngx = int(stats['estimated_size (bp)'] * (level / 100))
+#        print("this is the ngx", ngx)
+#        print("this is the csum", csum_ng)
+#        print("this is the csum", csum)
+#        print("this is the [csum >= ngx]", np.array(csum >= ngx))
+		new_array=np.array(csum >= ngx)
+ #       print(np.any(new_array))
+		if np.any(new_array) == False:
+			csumng = csum[seq_array.size-1]
+  #         print("this is the csumng", csumng)
+			lg_level = int(np.where(csum == csumng)[0]) + 1
+			stats['LG' + str(level)] = lg_level
+		elif np.any(new_array) == True:
+			csumng = min(csum[csum >= ngx])
+  #         print("this is the csumng", csumng)
+			lg_level = int(np.where(csum == csumng)[0]) + 1
+			stats['LG' + str(level)] = lg_level
+	for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]:
+		ngx = int(stats['estimated_size (bp)'] * (level / 100))
+		new_array=np.array(csum >= ngx)
+ #       print(np.any(new_array))
+		if np.any(new_array) == False:
+			csumng = csum[seq_array.size-1]
+  #          print("this is the csumng", csumng)
+			lg_level = int(np.where(csum == csumng)[0])
+			ng_level = int(sorted_lens[lg_level])
+			stats['NG' + str(level)] = ng_level
+		elif np.any(new_array) == True:
+			csumng = min(csum[csum >= ngx])
+   #         print("this is the csumng", csumng)
+			lg_level = int(np.where(csum == csumng)[0])
+			ng_level = int(sorted_lens[lg_level])
+			stats['NG' + str(level)] = ng_level
+	return stats
+if __name__ == "__main__":
+#    print(size_bp)
+#    print(type(size_bp))
+	naming = sys.argv[3]
+	infilename = sys.argv[1]
+	contig_lens, scaffold_lens, gc_cont = read_genome(infilename)
+#    contig_stats = calculate_stats(contig_lens, gc_cont)
+	scaffold_stats = calculate_stats(scaffold_lens, gc_cont)
+	rounded_gc = round(gc_cont, 2)
+#	print("this is the rounded_gc: ", rounded_gc)
+	contig_stats = calculate_stats(contig_lens, gc_cont)
+	gaps=contig_stats.get('sequence_count') - scaffold_stats.get('sequence_count')
+	scaffold_stats['number_of_gaps'] = gaps
+	contig_stats['number_of_gaps'] = gaps
+#	print("this is the scaffold_stats: ", scaffold_stats)
+#    print(scaffold_stats)
+#    df_scaffold_all= pd.DataFrame.from_dict(scaffold_stats, orient= 'index')
+#    print(df_scaffold_all)
+#    df2 = pd.DataFrame([scaffold_stats]).T
+#    print(df2)
+#    s = pd.Series(scaffold_stats, name=naming)
+#    s.index.name = 'Assembly:'
+#    s.reset_index()
+#    print(s)
+	scaff_seq=pd.DataFrame(scaffold_stats.items(), columns=['Assembly:', naming])
+#	print("this is the scaff_seq: ", scaff_seq)
+	df_scaffold_top=scaff_seq.iloc[0:7,0:2]
+#	print("this is GC CONTENT", gc_cont)
+	df_scaffold_top[naming]=df_scaffold_top[naming].astype('int64').apply('{:,}'.format)
+#	print("this is the top: ", df_scaffold_top)
+	df_scaffold_top[naming][2] = rounded_gc
+#	print("this is df_scaffold_top[naming][2]", df_scaffold_top[naming][2])
+#	print("this is the top: ", df_scaffold_top)
+    # df_scaffold_top=df_scaffold_top.style.hide_index()
+#    df_scaffold_top[naming].round(decimals=0)
+    # df_contig_all=pd.DataFrame(data=contig_stats)
+	# df_contig_top=df_contig_all.iloc[0:6,0:2]
+	df_scaffold_Nxx=pd.DataFrame(scaffold_stats.items(), columns=['Nxx Level (%)', 'Length of Nxx Scaffold (bp)'])
+	df_scaffold_Nxx=df_scaffold_Nxx.iloc[31:55,0:2]
+	df_scaffold_Nxx=df_scaffold_Nxx.reset_index()
+#    print(df_scaffold_Nxx)
+	df_scaffold_NGxx=pd.DataFrame(scaffold_stats.items(), columns=['NGxx Level (%)', 'Length of NGxx Scaffold (bp)'])
+	df_scaffold_NGxx=df_scaffold_NGxx.iloc[79:104,0:2]
+	df_scaffold_NGxx=df_scaffold_NGxx.reset_index()
+#    print(df_scaffold_NGxx)
+	df_scaffold_N_NG=pd.concat([df_scaffold_Nxx,df_scaffold_NGxx], axis=1)
+	df_scaffold_N_NG=df_scaffold_N_NG.drop(df_scaffold_N_NG.columns[[0,3]], axis = 1)
+	df_scaffold_N_NG['Length of Nxx Scaffold (bp)']=df_scaffold_N_NG['Length of Nxx Scaffold (bp)'].astype('int64').apply('{:,}'.format)
+	df_scaffold_N_NG['Length of NGxx Scaffold (bp)']=df_scaffold_N_NG['Length of NGxx Scaffold (bp)'].astype('int64').apply('{:,}'.format)
+    # df_scaffold_N_NG=df_scaffold_N_NG.style.hide_index()
+	df_scaffold_Lxx=pd.DataFrame(scaffold_stats.items(), columns=['Lxx Level (%)', 'Count of scaffolds (for Lxx Level)'])
+	df_scaffold_Lxx=df_scaffold_Lxx.iloc[7:31,0:2]
+	df_scaffold_Lxx=df_scaffold_Lxx.reset_index()
+#    print(df_scaffold_Nxx)
+	df_scaffold_LGxx=pd.DataFrame(scaffold_stats.items(), columns=['LGxx Level (%)', 'Count of scaffolds (for LGxx Level)'])
+	df_scaffold_LGxx=df_scaffold_LGxx.iloc[55:79,0:2]
+	df_scaffold_LGxx=df_scaffold_LGxx.reset_index()
+#    print(df_scaffold_NGxx)
+	df_scaffold_L_LG=pd.concat([df_scaffold_Lxx,df_scaffold_LGxx], axis=1)
+	df_scaffold_L_LG=df_scaffold_L_LG.drop(df_scaffold_L_LG.columns[[0,3]], axis = 1)
+	df_scaffold_L_LG['Count of scaffolds (for Lxx Level)']=df_scaffold_L_LG['Count of scaffolds (for Lxx Level)'].astype('int64').apply('{:,}'.format)
+	df_scaffold_L_LG['Count of scaffolds (for LGxx Level)']=df_scaffold_L_LG['Count of scaffolds (for LGxx Level)'].astype('int64').apply('{:,}'.format)
+################################################################################################################
+	contig_seq=pd.DataFrame(contig_stats.items(), columns=['Assembly:', naming])
+	df_contig_top=contig_seq.iloc[0:7,0:2]
+	df_contig_top[naming]=df_contig_top[naming].astype('int64').apply('{:,}'.format)
+	df_contig_top[naming][2] = rounded_gc
+    # df_scaffold_top=df_scaffold_top.style.hide_index()
+#    df_scaffold_top[naming].round(decimals=0)
+    # df_contig_all=pd.DataFrame(data=contig_stats)
+	# df_contig_top=df_contig_all.iloc[0:6,0:2]
+	df_contig_Nxx=pd.DataFrame(contig_stats.items(), columns=['Nxx Level (%)', 'Length of Nxx contig (bp)'])
+	df_contig_Nxx=df_contig_Nxx.iloc[31:55,0:2]
+	df_contig_Nxx=df_contig_Nxx.reset_index()
+#    print(df_scaffold_Nxx)
+	df_contig_NGxx=pd.DataFrame(contig_stats.items(), columns=['NGxx Level (%)', 'Length of NGxx contig (bp)'])
+	df_contig_NGxx=df_contig_NGxx.iloc[79:104,0:2]
+	df_contig_NGxx=df_contig_NGxx.reset_index()
+#   print(df_scaffold_NGxx)
+	df_contig_N_NG=pd.concat([df_contig_Nxx,df_contig_NGxx], axis=1)
+	df_contig_N_NG=df_contig_N_NG.drop(df_contig_N_NG.columns[[0,3]], axis = 1)
+	df_contig_N_NG['Length of Nxx contig (bp)']=df_contig_N_NG['Length of Nxx contig (bp)'].astype('int64').apply('{:,}'.format)
+	df_contig_N_NG['Length of NGxx contig (bp)']=df_contig_N_NG['Length of NGxx contig (bp)'].astype('int64').apply('{:,}'.format)
+	# df_scaffold_N_NG=df_scaffold_N_NG.style.hide_index()
+	df_contig_Lxx=pd.DataFrame(contig_stats.items(), columns=['Lxx Level (%)', 'Count of contig (for Lxx Level)'])
+	df_contig_Lxx=df_contig_Lxx.iloc[7:31,0:2]
+	df_contig_Lxx=df_contig_Lxx.reset_index()
+#    print(df_scaffold_Nxx)
+	df_contig_LGxx=pd.DataFrame(contig_stats.items(), columns=['LGxx Level (%)', 'Count of contig (for LGxx Level)'])
+	df_contig_LGxx=df_contig_LGxx.iloc[55:79,0:2]
+	df_contig_LGxx=df_contig_LGxx.reset_index()
+#    print(df_scaffold_NGxx)
+	df_contig_L_LG=pd.concat([df_contig_Lxx,df_contig_LGxx], axis=1)
+	df_contig_L_LG=df_contig_L_LG.drop(df_contig_L_LG.columns[[0,3]], axis = 1)
+	df_contig_L_LG['Count of contig (for Lxx Level)']=df_contig_L_LG['Count of contig (for Lxx Level)'].astype('int64').apply('{:,}'.format)
+	df_contig_L_LG['Count of contig (for LGxx Level)']=df_contig_L_LG['Count of contig (for LGxx Level)'].astype('int64').apply('{:,}'.format)
+    # df_scaffold_L_LG=df_scaffold_L_LG.style.hide_index()
+ #   print(df_contig_top)
+#    print(scaffold_stats)
+#    stat_output = {'Contig Stats': contig_stats,
+#                   'Scaffold Stats': scaffold_stats}
+#    print(json.dumps(stat_output, indent=2, sort_keys=False))
+#    scaffold_out = {scaffold_stats}
+#    contig_out = {contig_stats}
+#    print(json.dumps(scaffold_stats, indent=2, sort_keys=False))
+#    print(json.dumps(contig_stats, indent=2, sort_keys=False))
+ #   dict_writer = csv.DictWriter(stat_output, fieldnames=None, delimiter='\t')
+ #   print(dict_writer)
+#    df_raw = pd.DataFrame.from_dict(data, orient='index')
+ #   print(df_raw)
+#    with open('statdata.txt', 'w') as outfile:
+#        json.dump(scaffold_stats, outfile)
+#    with open('contigdata.txt', 'w') as out2file:
+#        json.dump(contig_stats, out2file)
+    # scaff = csv.writer(open(naming + "_scaffold_stats.tsv", "w"), delimiter='\t')
+    # for key, val in scaffold_stats.items():
+    #     scaff.writerow([key, val])
+	#
+    # contig = csv.writer(open(naming + "_contig_stats.tsv", "w"), delimiter='\t')
+    # for key, val in contig_stats.items():
+    #
+	with open(sys.argv[5], 'w') as outputfile:
+# #    print('#' + libraryName, file=outputfile)
+# #    print("Total Reads Processed (Paired):        " + total_processed + "   ( 100 %)", file=outputfile)
+# #    print("Discarded reads (Paired):              " + discarded + "    ( "+str(discarded_perc)+"%)", file=outputfile)
+# #    print("Successfully Processed reads (Paired): " + successfull + "   ( "+str(successfull_perc)+"%)", file=outputfile)
+		print(df_scaffold_top.to_string(index=False), file=outputfile)
+		print("", file=outputfile)
+		print(df_scaffold_N_NG.to_string(index=False), file=outputfile)
+		print("", file=outputfile)
+		print(df_scaffold_L_LG.to_string(index=False), file=outputfile)
+	with open(sys.argv[6], 'w') as outputfile2:
+# #    print('#' + libraryName, file=outputfile)
+# #    print("Total Reads Processed (Paired):        " + total_processed + "   ( 100 %)", file=outputfile)
+# #    print("Discarded reads (Paired):              " + discarded + "    ( "+str(discarded_perc)+"%)", file=outputfile)
+# #    print("Successfully Processed reads (Paired): " + successfull + "   ( "+str(successfull_perc)+"%)", file=outputfile)
+		print(df_contig_top.to_string(index=False), file=outputfile2)
+		print("", file=outputfile2)
+		print(df_contig_N_NG.to_string(index=False), file=outputfile2)
+		print("", file=outputfile2)
+		print(df_contig_L_LG.to_string(index=False), file=outputfile2)
+    #  with open(sys.argv[4], 'w') as outputRst:
+    #      print(tabulate(df_scaffold_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst)
+    #      print("", file=outputRst)
+    #      print(tabulate(df_scaffold_N_NG, headers='keys',tablefmt="rst", showindex=False), file=outputRst)
+    #      print("", file=outputRst)
+    #      print(tabulate(df_scaffold_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst)
+    #      print("", file=outputRst)
+	# #
+    #  with open(sys.argv[5], 'w') as outputRst2:
+    #      print(tabulate(df_contig_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst2)
+    #      print("", file=outputRst2)
+    #      print(tabulate(df_contig_N_NG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2)
+    #      print("", file=outputRst2)
+    #      print(tabulate(df_contig_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2)
+    #      print("", file=outputRst2)
+	# with open(sys.argv[4], 'w') as outputRst:
+	# 	print(tabulate(df_scaffold_top, headers='keys',tablefmt="pipe", showindex=False), file=outputRst)
+	# 	print("", file=outputRst)
+	# 	print(tabulate(df_scaffold_N_NG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst)
+	# 	print("", file=outputRst)
+	# 	print(tabulate(df_scaffold_L_LG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst)
+	# 	print("", file=outputRst)
+	# #
+	# with open(sys.argv[5], 'w') as outputRst2:
+	# 	print(tabulate(df_contig_top, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2)
+	# 	print("", file=outputRst2)
+	# 	print(tabulate(df_contig_N_NG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2)
+	# 	print("", file=outputRst2)
+	# 	print(tabulate(df_contig_L_LG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2)
+	# 	print("", file=outputRst2)
+	# list_of_dfs=[df_scaffold_top,df_scaffold_N_NG,df_scaffold_L_LG]
+    # for df in list_of_dfs:
+    #     with open('all_dfs.tsv','a') as f:
+    #         df.to_csv(f)
+    #         f.write("\n")
+#     with open("testok.tsv", 'w') as outputfile:
+# #    print('#' + libraryName, file=outputfile)
+# #    print("Total Reads Processed (Paired):        " + total_processed + "   ( 100 %)", file=outputfile)
+# #    print("Discarded reads (Paired):              " + discarded + "    ( "+str(discarded_perc)+"%)", file=outputfile)
+# #    print("Successfully Processed reads (Paired): " + successfull + "   ( "+str(successfull_perc)+"%)", file=outputfile)
+#         print(tabulate(loadinTop, headers='keys',tablefmt="rst", showindex=False), file=outputfile)
+# #    print('', file=outputfile)
+#         print(tabulate(result, headers='keys', tablefmt="psql", showindex=False), file=outputfile)
+#         print('', file=outputfile)
--- a/scripts/compare_results/colouredHeatmap_legend.tsv
+++ b/scripts/compare_results/colouredHeatmap_legend.tsv
+Gaps_per_Gb    		Scaff_NG50_Mb   			Cont_NG50_Mb  		QV    		Completeness  		Comp_Single_BUSCOs_%
+'< 200'			'> 100Mbp'				'> 10Mbp'		'> 50'		'> 95%'			'> 95%'
+'200 - 1000'		'10Mbp - 100Mbp'			'1Mbp - 10Mbp'		'40 - 50'	'90% - 95%'		'90% - 95%'
+'1000 - 10000'		'0.1Mbp - 10Mbp'			'0.01Mbp - 1Mbp'	'35 - 40'	'80% - 90%'		'80% - 90%'
+'> 10000'		'< 0.1Mbp'				'< 0.01Mbp'		'< 35'		'< 80%'			'< 80%'
--- a/scripts/compare_results/fullTable_heatmap_external.R
+++ b/scripts/compare_results/fullTable_heatmap_external.R
+suppressMessages(library(dplyr))
+suppressMessages(library(formattable))
+#suppressMessages(library(data.table))
+fullTableOfStats<-read.table(file = snakemake@input[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
+fullTableOfStats$Gaps_per_Gb <- ((fullTableOfStats$Gaps * fullTableOfStats$Est_Size_Mb)/1000)
+fullTableOfStats$Gaps_per_Gb <- as.integer(fullTableOfStats$Gaps_per_Gb)
+customPurple='#c699e8'
+customGreen='#8fc773'
+selectionOfStats_colouredHeatmap <- fullTableOfStats %>%
+  select(c('ASM_ID','ASM_LEVEL',
+           'Gaps_per_Gb', 'Scaff_NG50_Mb',
+           'Cont_NG50_Mb','QV',
+           'Completeness', 'Comp_Single_BUSCOs_%'))
+sink(file = snakemake@output[[1]])
+format_table(selectionOfStats_colouredHeatmap,
+             align =c("l","c","c","c","c", "c", "c", "c", "c"),
+             list(Gaps_per_Gb = formatter("span",
+                                          			style = ~style(display = "block",
+                                                         	padding = "0 4px",
+                                                         	`border-radius` = "3px",
+                                                         	`background-color` = ifelse(ASM_LEVEL == 'cont', "#666666",
+                                                                                     	ifelse(between(Gaps_per_Gb,1001, 10000), "#FFCC99",
+                                                                                            ifelse(between(Gaps_per_Gb,201 , 1000), customGreen,
+                                                                                                   ifelse(Gaps_per_Gb <= 200, customPurple, "#FF9999")))))),
+                  Scaff_NG50_Mb = formatter("span",
+                                            			style = ~style(display = "block",
+                                                           	padding = "0 4px",
+                                                           	`border-radius` = "3px",
+                                                           	`background-color` = ifelse(between(Scaff_NG50_Mb,0.1, 9.99999), "#FFCC99",
+                                                                                       ifelse(between(Scaff_NG50_Mb,10, 99.99999), customGreen,
+                                                                                              ifelse(Scaff_NG50_Mb >= 100, customPurple, "#FF9999"))))),
+                  Cont_NG50_Mb = formatter("span",
+                                           			style = ~style(display = "block",
+                                                          	padding = "0 4px",
+                                                          	`border-radius` = "3px",
+                                                          	`background-color` = ifelse(between(Cont_NG50_Mb,0.01, 0.99999), "#FFCC99",
+                                                                                      ifelse(between(Cont_NG50_Mb,1, 9.999999), customGreen,
+                                                                                             ifelse(Cont_NG50_Mb >= 10, customPurple, "#FF9999"))))),
+                  Completeness = formatter("span",
+                                           			style = ~style(display = "block",
+                                                          	padding = "0 4px",
+                                                          	`border-radius` = "3px",
+                                                          	`background-color` = ifelse(between(Completeness,80, 89.9999), "#FFCC99",
+                                                                                      ifelse(between(Completeness,90, 94.99999), customGreen,
+                                                                                             ifelse(Completeness >= 95, customPurple, "#FF9999"))))),
+                  `Comp_Single_BUSCOs_%` = formatter("span",
+                                                 		style = ~style(display = "block",
+                                                                padding = "0 4px",
+                                                                `border-radius` = "3px",
+                                                                `background-color` = ifelse(between(`Comp_Single_BUSCOs_%`,80, 89.9999), "#FFCC99",
+                                                                                            ifelse(between(`Comp_Single_BUSCOs_%`,90, 95), customGreen,
+                                                                                                   ifelse(`Comp_Single_BUSCOs_%` >= 95, customPurple, "#FF9999"))))),
+                  QV = formatter("span",
+                                 				style = ~style(display = "block",
+                                                		padding = "0 4px",
+                                                		`border-radius` = "3px",
+                                                		`background-color` = ifelse(between(QV,35, 39.9999999999), "#FFCC99",
+                                                                            		ifelse(between(QV,40, 49.999999999), customGreen,
+                                                                                   		ifelse(QV >= 50, customPurple, "#FF9999")))))))
+cat('<br>')
+cat("\n")
+cat('<br>')
+legendTable<-read.table(file = snakemake@params[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
+format_table(legendTable,
+            align =c("c","c","c", "c", "c", "c", "c"),
+            list(Gaps_per_Gb = formatter("span",
+                                         style = ~style(display = "block",
+                                                        padding = "0 4px",
+                                                        `border-radius` = "3px",
+							"font.size" = "12px",
+ 							"width" = '150px',
+                                                        `background-color` = ifelse(Gaps_per_Gb == '> 10000',"#FF9999",
+                                                                                    ifelse(Gaps_per_Gb == '1000 - 10000',"#FFCC99",
+                                                                                           ifelse(Gaps_per_Gb == '200 - 1000',customGreen, customPurple))))),
+                 Scaff_NG50_Mb = formatter("span",
+                                         style = ~style(display = "block",
+                                                        padding = "0 4px",
+                                                        `border-radius` = "3px",
+							"font.size" = "12px",
+							"width" = '150px',
+                                                        `background-color` = ifelse(Scaff_NG50_Mb == '< 0.1Mbp',"#FF9999",
+                                                                                    ifelse(Scaff_NG50_Mb == '0.1Mbp - 10Mbp',"#FFCC99",
+                                                                                           ifelse(Scaff_NG50_Mb == '10Mbp - 100Mbp',customGreen, customPurple))))),
+                 Cont_NG50_Mb = formatter("span",
+                                           style = ~style(display = "block",
+                                                          padding = "0 4px",
+                                                          `border-radius` = "3px",
+							  "font.size" = "12px",
+							  "width" = '150px',
+                                                          `background-color` = ifelse(Cont_NG50_Mb == '< 0.01Mbp',"#FF9999",
+                                                                                      ifelse(Cont_NG50_Mb == '0.01Mbp - 1Mbp',"#FFCC99",
+                                                                                             ifelse(Cont_NG50_Mb == '1Mbp - 10Mbp',customGreen, customPurple))))),
+                 QV = formatter("span",
+                                          style = ~style(display = "block",
+                                                         padding = "0 4px",
+                                                         `border-radius` = "3px",
+							 "font.size" = "12px",
+							 "width" = '100px',
+                                                         `background-color` = ifelse(QV == '< 35',"#FF9999",
+                                                                                     ifelse(QV == '35 - 40',"#FFCC99",
+                                                                                            ifelse(QV == '40 - 50',customGreen, customPurple))))),
+                 Completeness = formatter("span",
+                                style = ~style(display = "block",
+                                               padding = "0 4px",
+                                               `border-radius` = "3px",
+					       "font.size" = "12px",
+					       "width" = '150px',
+                                               `background-color` = ifelse(Completeness == '< 80%',"#FF9999",
+                                                                           ifelse(Completeness == '80% - 90%',"#FFCC99",
+                                                                                  ifelse(Completeness == '90% - 95%',customGreen, customPurple))))),
+                 `Comp_Single_BUSCOs_%` = formatter("span",
+                                          style = ~style(display = "block",
+                                                         padding = "0 4px",
+                                                         `border-radius` = "3px",
+							 "font.size" = "12px",
+							 "width" = '220px',
+                                                         `background-color` = ifelse(`Comp_Single_BUSCOs_%` == '< 80%',"#FF9999",
+                                                                                     ifelse(`Comp_Single_BUSCOs_%` == '80% - 90%',"#FFCC99",
+                                                                                            ifelse(`Comp_Single_BUSCOs_%` == '90% - 95%',customGreen, customPurple)))))))
+sink(file = NULL)
--- a/scripts/compare_results/fullTable_heatmap_internalComparison.R
+++ b/scripts/compare_results/fullTable_heatmap_internalComparison.R
+suppressMessages(library(dplyr))
+suppressMessages(library(formattable))
+fullTableOfStats<-read.table(file = snakemake@input[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
+fullTableOfStats$Gaps_per_Gb <- ((fullTableOfStats$Gaps * fullTableOfStats$Est_Size_Mb)/1000)
+fullTableOfStats$Gaps_per_Gb <- as.integer(fullTableOfStats$Gaps_per_Gb)
+selectionOfStats_internalComparisonHeatmap <- fullTableOfStats %>%
+  select(c('ASM_ID','ASM_LEVEL', 'Bases_Mb', 'Het_%','GC_%', 'Gaps_per_Gb', 'Scaff', 'Cont',
+           'Longest_Scaff_Mb','Scaff_NG50_Mb', 'Scaff_NG95_Mb',
+           'Longest_Cont_Mb', 'Cont_NG50_Mb','Cont_NG95_Mb',
+           'QV', 'Completeness',
+           'Comp_BUSCOs_%','Comp_Single_BUSCOs_%'))
+customBlue_max = "#2e96ff"
+customBlue_min = "#dcecfc"
+customGray_max = "#8c8c8c"
+customGray_min = "#e6e6e6"
+sink(file = snakemake@output[[1]])
+format_table(selectionOfStats_internalComparisonHeatmap,
+             align =c("l","c","c","c","c", "c", "c", "c", "c"),
+             list(Bases_Mb = color_tile(customGray_min, customGray_max),
+                  `Het_%` = color_tile(customGray_min, customGray_max),
+                  `GC_%` = color_tile(customGray_min, customGray_max),
+                  Gaps_per_Gb = color_tile(customBlue_max, customBlue_min),
+                  Scaff = color_tile(customBlue_max,customBlue_min),
+                  Cont = color_tile(customBlue_max,customBlue_min),
+                  Longest_Scaff_Mb = color_tile(customBlue_min, customBlue_max),
+                  Scaff_NG50_Mb = color_tile(customBlue_min, customBlue_max),
+                  Scaff_NG95_Mb = color_tile(customBlue_min, customBlue_max),
+                  Longest_Cont_Mb = color_tile(customBlue_min, customBlue_max),
+                  Cont_NG50_Mb = color_tile(customBlue_min, customBlue_max),
+                  Cont_NG95_Mb = color_tile(customBlue_min, customBlue_max),
+                  QV = color_tile(customBlue_min, customBlue_max),
+                  Completeness = color_tile(customBlue_min, customBlue_max),
+                  `Comp_BUSCOs_%` = color_tile(customBlue_min, customBlue_max),
+                  `Comp_Single_BUSCOs_%` = color_tile(customBlue_min, customBlue_max)))
+cat('<br>')
+cat("\n")
+cat('<br>')
+legendTable<-read.table(file = snakemake@params[[1]], sep = "", header = TRUE, row.names=NULL, check.names = FALSE)
+format_table(legendTable,
+            align =c("c","c","c", "c", "c", "c", "c","c","c","c", "c", "c", "c", "c","c", "c"),
+            list(Bases_Mb = formatter("span",
+                                         style = ~style(display = "block",
+                                                        padding = "0 4px",
+                                                        `border-radius` = "3px",
+                                                        `background-color` = ifelse(Bases_Mb == 'Max',customGray_max, customGray_min))),
+                 `Het_%` = formatter("span",
+                                      style = ~style(display = "block",
+                                                     padding = "0 4px",
+                                                     `border-radius` = "3px",
+                                                     `background-color` = ifelse(`Het_%` == 'Max',customGray_max, customGray_min))),
+                 `GC_%` = formatter("span",
+                                     style = ~style(display = "block",
+                                                    padding = "0 4px",
+                                                    `border-radius` = "3px",
+                                                    `background-color` = ifelse(`GC_%` == 'Max',customGray_max, customGray_min))),
+                 Gaps_per_Gb = formatter("span",
+                                    style = ~style(display = "block",
+                                                   padding = "0 4px",
+                                                   `border-radius` = "3px",
+                                                   `background-color` = ifelse(Gaps_per_Gb == 'Min',customBlue_max, customBlue_min))),
+                 Scaff = formatter("span",
+                                         style = ~style(display = "block",
+                                                        padding = "0 4px",
+                                                        `border-radius` = "3px",
+                                                        `background-color` = ifelse(Scaff == 'Min',customBlue_max, customBlue_min))),
+                 Cont = formatter("span",
+                                         style = ~style(display = "block",
+                                                        padding = "0 4px",
+                                                        `border-radius` = "3px",
+                                                        `background-color` = ifelse(Cont == 'Min',customBlue_max, customBlue_min))),
+                 Longest_Scaff_Mb = formatter("span",
+                                    style = ~style(display = "block",
+                                                 padding = "0 4px",
+                                                 `border-radius` = "3px",
+                                                 `background-color` = ifelse(Longest_Scaff_Mb == 'Max',customBlue_max, customBlue_min))),
+                 Scaff_NG50_Mb = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(Scaff_NG50_Mb == 'Max',customBlue_max, customBlue_min))),
+                 Scaff_NG95_Mb = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(Scaff_NG95_Mb == 'Max',customBlue_max, customBlue_min))),
+                 Longest_Cont_Mb = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(Longest_Cont_Mb == 'Max',customBlue_max, customBlue_min))),
+                 Cont_NG50_Mb = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(Cont_NG50_Mb == 'Max',customBlue_max, customBlue_min))),
+                 Cont_NG95_Mb = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(Cont_NG95_Mb == 'Max',customBlue_max, customBlue_min))),
+                 QV = formatter("span",
+                                              style = ~style(display = "block",
+                                                             padding = "0 4px",
+                                                             `border-radius` = "3px",
+                                                             `background-color` = ifelse(QV == 'Max',customBlue_max, customBlue_min))),
+                 Completeness = formatter("span",
+                                style = ~style(display = "block",
+                                               padding = "0 4px",
+                                               `border-radius` = "3px",
+                                               `background-color` = ifelse(Completeness == 'Max',customBlue_max, customBlue_min))),
+                 `Comp_BUSCOs_%` = formatter("span",
+                                style = ~style(display = "block",
+                                               padding = "0 4px",
+                                               `border-radius` = "3px",
+                                               `background-color` = ifelse(`Comp_BUSCOs_%` == 'Max',customBlue_max, customBlue_min))),
+                 `Comp_Single_BUSCOs_%` = formatter("span",
+                                style = ~style(display = "block",
+                                               padding = "0 4px",
+                                               `border-radius` = "3px",
+                                               `background-color` = ifelse(`Comp_Single_BUSCOs_%` == 'Max',customBlue_max, customBlue_min)))))
+sink(file = NULL)
--- a/scripts/compare_results/internalComparison_legend.tsv
+++ b/scripts/compare_results/internalComparison_legend.tsv
+Bases_Mb   	Het_%    	GC_%  		Gaps_per_Gb  	 Scaff    	Cont   		Longest_Scaff_Mb 	Scaff_NG50_Mb 	Scaff_NG95_Mb   Longest_Cont_Mb 	Cont_NG50_Mb    Cont_NG95_Mb    QV    		Completeness    Comp_BUSCOs_%    Comp_Single_BUSCOs_%
+'Max'		'Max'		'Max'		'Max'		'Max'		'Max'		'Max'			'Max'		'Max'		'Max'			'Max'		'Max'		'Max'		'Max'		'Max'		'Max'
+'Min'		'Min'		'Min'		'Min'		'Min'		'Min'		'Min'			'Min'		'Min'		'Min'			'Min'		'Min'		'Min'		'Min'		'Min'		'Min'
--- a/scripts/process_HiC/filter_five_end.pl
+++ b/scripts/process_HiC/filter_five_end.pl
+#!/usr/bin/perl
+use strict;
+use warnings;
+my $prev_id = "";
+my @five;
+my @three;
+my @unmap;
+my @mid;
+my @all;
+my $counter = 0;
+while (<STDIN>){
+	chomp;
+	if (/^@/){
+		print $_."\n";
+		next;
+	}
+	my ($id, $flag, $chr_from, $loc_from, $mapq, $cigar, $d1, $d2, $d3, $read, $read_qual, @rest) = split /\t/;
+	my $bin = reverse(dec2bin($flag));
+	my @binary = split(//,$bin);
+	if ($prev_id ne $id && $prev_id ne ""){
+		if ($counter == 1){
+			if (@five == 1){
+				print $five[0]."\n";
+			}
+			else{
+				my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
+				my $bin_1 = reverse(dec2bin($flag_1));
+				my @binary_1 = split(//,$bin_1);
+				$binary_1[2] = 1;
+				my $bin_1_new = reverse(join("",@binary_1));
+				my $flag_1_new =  bin2dec($bin_1_new);
+				print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
+			}
+		}
+		elsif ($counter == 2 && @five == 1){
+			print $five[0]."\n";
+		}
+		else{
+			my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
+			my $bin_1 = reverse(dec2bin($flag_1));
+			my @binary_1 = split(//,$bin_1);
+			$binary_1[2] = 1;
+			my $bin_1_new = reverse(join("",@binary_1));
+			my $flag_1_new =  bin2dec($bin_1_new);
+			print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
+		}
+		$counter = 0;
+		undef @unmap;
+		undef @five;
+		undef @three;
+		undef @mid;
+		undef @all;
+	}
+	$counter++;
+	$prev_id = $id;
+	push @all,$_;
+	if ($binary[2]==1){
+		push @unmap,$_;
+	}
+	elsif ($binary[4]==0 && $cigar =~ m/^[0-9]*M/ || $binary[4]==1 && $cigar =~ m/.*M$/){
+		push @five, $_;
+	}
+	elsif ($binary[4]==1 && $cigar =~ m/^[0-9]*M/ || $binary[4]==0 && $cigar =~ m/.*M$/){
+		push @three, $_;
+	}
+	elsif ($cigar =~ m/^[0-9]*[HS].*M.*[HS]$/){
+		push @mid, $_;
+	}
+}
+if ($counter == 1){
+	if (@five == 1){
+		print $five[0]."\n";
+	}
+	else{
+		my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
+		my $bin_1 = reverse(dec2bin($flag_1));
+		my @binary_1 = split(//,$bin_1);
+		$binary_1[2] = 1;
+		my $bin_1_new = reverse(join("",@binary_1));
+		my $flag_1_new =  bin2dec($bin_1_new);
+		print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
+	}
+}
+elsif ($counter == 2 && @five == 1){
+	print $five[0]."\n";
+}
+else{
+	my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0];
+	my $bin_1 = reverse(dec2bin($flag_1));
+	my @binary_1 = split(//,$bin_1);
+	$binary_1[2] = 1;
+	my $bin_1_new = reverse(join("",@binary_1));
+	my $flag_1_new =  bin2dec($bin_1_new);
+	print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n");
+}
+sub dec2bin {
+	my $str = unpack("B32", pack("N", shift));
+	return $str;
+}
+sub bin2dec {
+	return unpack("N", pack("B32", substr("0" x 32 . shift, -32)));
+}
--- a/scripts/process_HiC/two_read_bam_combiner.pl
+++ b/scripts/process_HiC/two_read_bam_combiner.pl
+#!/usr/bin/perl
+use strict;
+MAIN : {
+    my ($read1_bam, $read2_bam) = @ARGV;
+	if ((not defined $read1_bam) ||
+	(not defined $read2_bam)) {
+	die ("Usage: ./two_read_bam_combiner.pl <read 1 bam> <read 2 bam>\n");
+    }
+    open(FILE1,"samtools view -h $read1_bam |");
+    open(FILE2,"samtools view -h $read2_bam |");
+    my $line1 = <FILE1>;
+    my $line2 = <FILE2>;
+    my $counter = 0;
+    my $new_counter = 0;
+    while (defined $line1) {
+	## the line directly below this needed to be modified slightly from the genotyping pipeline ##
+	if ($line1 =~ /^(\@)SQ/){
+		if ($line1 ne $line2){print $line1;print $line2; die ("inconsistent bam header.");}
+		else{
+			print $line1;
+		}
+		$line1 = <FILE1>;
+		$line2 = <FILE2>;
+		next;
+	}
+	$counter++;
+	if ($counter == ($new_counter + 1000000)) {
+	    print STDERR $counter . "\n";
+	    $new_counter = $counter;
+	}
+	chomp $line1;
+	chomp $line2;
+	my ($id1, $flag1, $chr_from1, $loc_from1, $mapq1, $cigar1, $d1_1, $d2_1, $d3_1, $read1, $read_qual1, @rest1) = split(/\t/,$line1);
+	my ($id2, $flag2, $chr_from2, $loc_from2, $mapq2, $cigar2, $d1_2, $d2_2, $d3_2, $read2, $read_qual2, @rest2) = split(/\t/,$line2);
+	if ($id1 ne $id2) {
+	    die ("The id's of the two files do not match up at line number $counter\n");
+	}
+	my $bin1 = reverse(dec2bin($flag1));
+	my $bin2 = reverse(dec2bin($flag2));
+	my @binary1 = split(//,$bin1);
+	my @binary2 = split(//,$bin2);
+	my $trouble = 0;
+	if (($binary1[2] == 1) || ($mapq1 < 10)) {
+	    $trouble = 1;
+	}
+	if (($binary2[2]== 1) || ($mapq2 < 10)) {
+            $trouble = 1;
+    }
+	my $proper_pair1;
+	my $proper_pair2;
+	my $dist1;
+	my $dist2;
+	if (($binary1[2] == 0) && ($binary2[2] == 0)) {
+	    $proper_pair1 = 1;
+	    $proper_pair2 = 1;
+	    if ($chr_from1 eq $chr_from2) {
+		my $dist = abs($loc_from1 - $loc_from2);
+		if ($loc_from1 >= $loc_from2) {
+		    $dist1 = -1*$dist;
+		    $dist2 = $dist;
+		} else {
+		    $dist1 = $dist;
+		    $dist2 = -1*$dist;
+		}
+	    } else {
+		$dist1 = 0;
+		$dist2 = 0;
+	    }
+	} else {
+	    $proper_pair1 = 0;
+            $proper_pair2 = 0;
+	    $dist1 = 0;
+	    $dist2 = 0;
+	}
+	my $new_bin1 = join("","000000000000000000000",$binary1[10],$binary1[9],"0","0","1",$binary2[4],$binary1[4],$binary2[2],$binary1[2],$proper_pair1,"1");
+	my $new_bin2 = join("","000000000000000000000",$binary2[10],$binary2[9],"0","1","0",$binary1[4],$binary2[4],$binary1[2],$binary2[2],$proper_pair2,"1");
+	my $new_flag1 = bin2dec($new_bin1);
+	my $new_flag2 = bin2dec($new_bin2);
+	unless ($trouble == 1) {
+	    print(join("\t",$id1,$new_flag1,$chr_from1,$loc_from1,$mapq1,$cigar1,$chr_from2,$loc_from2,$dist1,$read1,$read_qual1,@rest1) . "\n");
+	    print(join("\t",$id2,$new_flag2,$chr_from2,$loc_from2,$mapq2,$cigar2,$chr_from1,$loc_from1,$dist2,$read2,$read_qual2,@rest2) . "\n");
+	}
+	$line1 = <FILE1>;
+	$line2 = <FILE2>;
+    }
+}
+sub dec2bin {
+    my $str = unpack("B32", pack("N", shift));
+    return $str;
+}
+sub bin2dec {
+    return unpack("N", pack("B32", substr("0" x 32 . shift, -32)));
+}
--- a/scripts/report/addFullTableForReport.py
+++ b/scripts/report/addFullTableForReport.py
+import pandas as pd
+from tabulate import tabulate
+samples=pd.read_csv(snakemake.input.results, dtype=str, index_col=0, delim_whitespace=True, skip_blank_lines=True)
+# samples=samples.reset_index
+turn2FloatAndMb=['Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont_Mb','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb']
+roundDecimals=['Comp_BUSCOs_%','Comp_Single_BUSCOs_%','Het_%','GC_%','QV','Completeness','Bases_Mb','Est_Size_Mb','Longest_Scaff_Mb','Scaff_N50_Mb','Scaff_NG50_Mb','Scaff_N95_Mb','Scaff_NG95_Mb','Longest_Cont_Mb','Cont_N50_Mb','Cont_NG50_Mb','Cont_N95_Mb','Cont_NG95_Mb']
+print('this is samples',samples)
+sampleTransposed=samples.T
+print('this is sampleTransposed',sampleTransposed)
+for header in turn2FloatAndMb:
+	sampleTransposed[header]=sampleTransposed[header].astype(float).div(1000000)
+for i in range(0,4):
+	sampleTransposed[roundDecimals[i]]=sampleTransposed[roundDecimals[i]].str.replace('%','')
+for roundHeader in roundDecimals:
+	sampleTransposed[roundHeader]=sampleTransposed[roundHeader].astype(float).round(2)
+with open(snakemake.output[0], "w") as out_markdown:
+	print("", file=out_markdown)
+	print("\\blandscape", file=out_markdown)
+	print("", file=out_markdown)
+	print("\\tiny", file=out_markdown)
+	print(tabulate(sampleTransposed.rename_axis('ASM_ID'), headers='keys',tablefmt="pipe", showindex=True), file=out_markdown)
+	print("\\elandscape", file=out_markdown)
+	print("", file=out_markdown)
+with open(snakemake.output[1], "w") as out_plain:
+	print(tabulate(sampleTransposed.rename_axis('ASM_ID'), headers='keys',tablefmt="plain", showindex=True), file=out_plain)
--- a/scripts/report/makePDF_indivMD.py
+++ b/scripts/report/makePDF_indivMD.py
+import os
+import numpy as np
+import pandas as pd
+from tabulate import tabulate
+key_stats=snakemake.input[0]
+#key_stats="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_aggregatedResults.tsv"
+key_stats_table = pd.read_csv(key_stats, dtype=str, delim_whitespace=True, skip_blank_lines=True)
+key_stats_table = key_stats_table.iloc[:-4 , :]
+genomescope_linear=snakemake.input[1]
+#genomescope_linear="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_gScope_linear_plot.png"
+genomescope_log=snakemake.input[2]
+#genomescope_log="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_gScope_log_plot.png"
+QV_score=snakemake.input[3]
+#QV_score="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_merq.qv"
+QV_score_table = pd.read_csv(QV_score, dtype=str, delim_whitespace=True, skip_blank_lines=True, names=["assembly_name","Assembly_Only", "Total_Kmers", "QV_Score", "Error"])
+QV_score_table	= QV_score_table.drop(['Error'], axis=1).set_index(['assembly_name'])
+kmer_completeness=snakemake.input[4]
+##kmer_completeness="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_merq.completeness.stats"
+kmer_completeness_table = pd.read_csv(kmer_completeness, dtype=str, delim_whitespace=True, skip_blank_lines=True, names=["assembly_name","all","Kmers_Assembly", "Kmers_Reads", "%"])
+kmer_completeness_table	= kmer_completeness_table.drop(["all"], axis=1).set_index(['assembly_name'])
+busco=snakemake.input[5]
+#busco="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/only_busco_scores.txt"
+with open(busco) as scoresOnlyFile:
+    lines = scoresOnlyFile.readlines()
+    lines = [line.rstrip() for line in lines]
+kmer_flat=snakemake.input[6]
+#kmer_flat="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_merq.L_sceleratus.spectra-cn.fl.png"
+kmer_stacked=snakemake.input[7]
+#kmer_stacked="/srv/public/users/james94/data/Lsceleratus/GEP_results/L_sceleratus/5_Key_Results/L_sceleratus_merq.L_sceleratus.spectra-cn.st.png"
+params_assemblyID=snakemake.params[0]
+#params_assemblyID="Lagocephalus sceleratus"
+params_merylKmer=snakemake.params[1]
+# params_merylKmer="21"
+params_buscoDB=snakemake.params[2]
+# params_buscoDB="Vertebrata"
+with open(snakemake.output[0], 'w') as outFile:
+# print("---", "title: 'GEP Quick-View'", "author: James Sullivan", "date: 11/09/2021", "geometry: margin=2cm", "classoption: table", "documentclass: extarticle", "urlcolor: blue", "colorlinks: true", "header-includes: |", "  \\rowcolors{2}{gray!10}{gray!25}" , "  ```{=latex}", "---", sep="\n")
+	print("", file=outFile)
+	print("\\twocolumn", file=outFile)
+	print("\\Large", file=outFile)
+	print("# Assembly:",params_assemblyID, file=outFile)
+	print("", file=outFile)
+	print(" - db built with kmer size ", params_merylKmer, "bp", file=outFile)
+	print("", file=outFile)
+	print("### Genome Statistics", file=outFile)
+	print("", file=outFile)
+	print(tabulate(key_stats_table, headers='keys',tablefmt="pipe", showindex=False), file=outFile)
+	print("", file=outFile)
+	print("### Genomescope2 Profile (Linear)", file=outFile)
+	print("![](", genomescope_linear, "){ width=37% }", file=outFile)
+	print("", file=outFile)
+	print("### Genomescope2 Profile (Log)", file=outFile)
+	print("![](", genomescope_log, "){ width=37% }", file=outFile)
+	print("\\", file=outFile)
+	print("\\pagebreak", file=outFile)
+	print("\\", file=outFile)
+	print("", file=outFile)
+	print("\\large", file=outFile)
+	print("### QV Score", file=outFile)
+	print(tabulate(QV_score_table, headers='keys',tablefmt="pipe", showindex=True), file=outFile)
+	print("", file=outFile)
+	print("\\large", file=outFile)
+	print("### Kmer Completeness", file=outFile)
+	print(tabulate(kmer_completeness_table, headers='keys',tablefmt="pipe", showindex=True), file=outFile)
+	print("", file=outFile)
+	print("\\Large", file=outFile)
+	print("### BUSCOv5 (database: ", params_buscoDB, ")", file=outFile)
+	print("```", file=outFile)
+	for line in lines:
+		print(line, file=outFile)
+	print("```", file=outFile)
+	print("\\", file=outFile)
+	print("\\large", file=outFile)
+	print("", file=outFile)
+	print("### K-mer Multiplicity PRI Only (Stacked)", file=outFile)
+	print("![](", kmer_flat, "){ width=44% }", file=outFile)
+	print("\\", file=outFile)
+	print("", file=outFile)
+	print("### K-mer Multiplicity PRI + ALT (Stacked)", file=outFile)
+	print("![](", kmer_stacked, "){ width=44% }", file=outFile)
+	print("\\", file=outFile)
+	print("\\pagebreak", file=outFile)
+# ---
+# title: "GEP Quick-View"
+# date: 11/09/2021
+# geometry: a4paper
+# classoption: table
+# documentclass: extarticle
+# urlcolor: blue
+# #pdf_document: null
+# colorlinks: true
+# header-includes: |
+#   \rowcolors{2}{gray!10}{gray!25}
+#   ```{=latex}
+#   \usepackage{pdflscape}
+#   \usepackage{longtable}
+#   \newcommand{\blandscape}{\begin{landscape}}
+#   \newcommand{\elandscape}{\end{landscape}}
+#     ```
+# output: pdf_document
+# ---
+#
+# print("---", "title: 'GEP Quick-View'", "author: James Sullivan", "date: 11/09/2021", "geometry: margin=2cm", "classoption: table", "documentclass: extarticle", "urlcolor: blue", "colorlinks: true", "header-includes: |", "  \\rowcolors{2}{gray!10}{gray!25}" , "  ```{=latex}", "" sep="\n")
--- a/scripts/report/pretextMapsToMarkdown.py
+++ b/scripts/report/pretextMapsToMarkdown.py
+import shutil
+shutil.copyfile(snakemake.input.PretextMap, snakemake.output.pretextCP2keyResults)
+with open(snakemake.output.IndividualPretextMD, "w") as out:
+	print("", file=out)
+	print("###",snakemake.params.assemblyName," HiC Heatmap", file=out)
+	print("![](", snakemake.input.PretextMap, "){ height=30% }", file=out)
--- a/scripts/report/reportLandingPage.md
+++ b/scripts/report/reportLandingPage.md
+---
+geometry: b3paper, margin=1.3cm
+classoption:
+ - table
+ - twocolumn
+urlcolor: blue
+colorlinks: true
+header-includes:
+- |
+  ```{=latex}
+  \rowcolors{2}{gray!10}{gray!25}
+  \usepackage{longtable}
+  \usepackage{supertabular}
+  \let\longtable\supertabular
+  \let\endlongtable\endsupertabular
+  \let\endhead\relax
+  ```
+output: pdf_document
+---
--- a/scripts/report/tableOnSamePage.css
+++ b/scripts/report/tableOnSamePage.css
+<style>
+.main-container { width: 1200px; max-width:2800px;}
+</style>
--- a/scripts/standIN_files/ALT_missing.fasta
+++ b/scripts/standIN_files/ALT_missing.fasta