From 1fb5c77aea88d4e2f3525ab4e470ab9937b37bcb Mon Sep 17 00:00:00 2001
From: james94 <james94@mi.fu-berlin.de>
Date: Tue, 11 May 2021 18:46:49 +0200
Subject: [PATCH] Handle pre-built

---
 Snakefile                |  6 ++++--
 rules/rulesNew_short.smk | 10 +++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/Snakefile b/Snakefile
index 82b0c19..c493bbe 100644
--- a/Snakefile
+++ b/Snakefile
@@ -32,10 +32,12 @@ if 'Library_R1' in samples.columns:
 	samples = samples1.set_index('assemblyName')
 	# print(samples)
 	samples=samples.join(samples6['est_bp'])
-	print(list(samples.index))
+	print("The following assemblies will be evaluated: ", list(samples.index))
 
 elif 'preBuiltMerylDatabase' in samples.columns:
-	print(list(samples.index))
+	samples['est_bp'] = samples['est_bp'].fillna(0)
+	samples["est_bp"] = pd.to_numeric(samples["est_bp"])
+	print("The following assemblies will be evaluated: ", list(samples.index))
 else:
 	raise ValueError('Sample Sheet structure not recognised. Please make sure you are using the correct sample sheet (prebuilt or non-prebuilt meryl db, etc)')
 
diff --git a/rules/rulesNew_short.smk b/rules/rulesNew_short.smk
index f410e84..7447a93 100644
--- a/rules/rulesNew_short.smk
+++ b/rules/rulesNew_short.smk
@@ -239,7 +239,7 @@ rule merqury:
 		report(os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.{sample}.spectra-cn.fl.png"), caption="../report/merqury/spectra-cn_filled.rst", category="Copy Number Spectra Plots", subcategory="{sample}"),
 		os.path.join(config['Results'], "{sample}" +"/2_QVstats_merylAndMerqury/combinedRead1Read2.21.hist"),
 		symlink_fasta=os.path.join(config['Results'], "{sample}" + "/{sample}.fasta"),
-		symlink_merylDB=os.path.join(config['Results'], "{sample}" + "/combinedRead1Read2.21.meryl")
+		symlink_merylDB=directory(os.path.join(config['Results'], "{sample}" + "/combinedRead1Read2.21.meryl"))
 	log:
 		os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/logs/{sample}_merqury.log")
 	priority:
@@ -272,7 +272,7 @@ rule genomescope2:
 		check2=os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.{sample}.spectra-cn.st.png")
 	params:
 		outFolder=os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/"),
-		cpHist=os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/combinedRead1Read2.21.histo"),
+		cpHist=os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/combinedRead1Read2_10k.21.histo"),
 		findGscope=os.path.join(workflow.basedir)
 	output:
 		summary=report(os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_summary.txt"), caption="../report/genomescope/summary.rst", category="Genomescope Profile", subcategory="{sample}"),
@@ -283,12 +283,12 @@ rule genomescope2:
 		"../envs/genomescope.yaml"
 	shell:
 		"""
-		expand -t 1 {input.hist} > {params.cpHist}
+		head -n 10000 {input.hist} > {params.cpHist}
 		export genomescope2script=$(find {params.findGscope} -name genomescope2)
 		Rscript $genomescope2script -i {params.cpHist} -o {params.outFolder} -k 21 -n {wildcards.sample}
 		grep "Genome Haploid Length" {output.summary} | awk {{'print $6'}} | sed 's/,//g'> {output.estimatedSize}
 		"""
-
+		# expand -t 1 {input.hist} > {params.cpHist}
 
 rule assemblyStats:
 	input:
@@ -438,7 +438,7 @@ rule aggregateAllAssemblies:
 		"""
 		cp {input.sampleSheet} {output.newSampleSheet}
 		cp {input.config} {output.newConfigFile}
-		echo -e "AAssembly\nsize_estimate\nmax_heterozygosity\nqv_score\nScaffold_N50_length\nScaffold_NG50_length\nScaffold_N95_length\nScaffold_NG95_length\nScaffold_N100_length\nScaffold_NG100_length\nContig_N50_length\nContig_NG50_length\ntotal_num_bases\ntotal_num_scaffolds\ntotal_num_contigs\nnumber_of_gaps" | \
+		echo -e "Assembly\nsize_estimate\nmax_heterozygosity\nqv_score\nScaffold_N50_length\nScaffold_NG50_length\nScaffold_N95_length\nScaffold_NG95_length\nScaffold_N100_length\nScaffold_NG100_length\nContig_N50_length\nContig_NG50_length\ntotal_num_bases\ntotal_num_scaffolds\ntotal_num_contigs\nnumber_of_gaps" | \
 		paste -d'\t' - {input.allResults} | \
 		awk -F'\t' '{{s="";for (i=1;i<=NF;i+=2) {{s=s?s FS $i:$i}} print s}}' | \
 		column -t > {output.results}
-- 
GitLab