From 1fb5c77aea88d4e2f3525ab4e470ab9937b37bcb Mon Sep 17 00:00:00 2001 From: james94 <james94@mi.fu-berlin.de> Date: Tue, 11 May 2021 18:46:49 +0200 Subject: [PATCH] Handle pre-built --- Snakefile | 6 ++++-- rules/rulesNew_short.smk | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Snakefile b/Snakefile index 82b0c19..c493bbe 100644 --- a/Snakefile +++ b/Snakefile @@ -32,10 +32,12 @@ if 'Library_R1' in samples.columns: samples = samples1.set_index('assemblyName') # print(samples) samples=samples.join(samples6['est_bp']) - print(list(samples.index)) + print("The following assemblies will be evaluated: ", list(samples.index)) elif 'preBuiltMerylDatabase' in samples.columns: - print(list(samples.index)) + samples['est_bp'] = samples['est_bp'].fillna(0) + samples["est_bp"] = pd.to_numeric(samples["est_bp"]) + print("The following assemblies will be evaluated: ", list(samples.index)) else: raise ValueError('Sample Sheet structure not recognised. Please make sure you are using the correct sample sheet (prebuilt or non-prebuilt meryl db, etc)') diff --git a/rules/rulesNew_short.smk b/rules/rulesNew_short.smk index f410e84..7447a93 100644 --- a/rules/rulesNew_short.smk +++ b/rules/rulesNew_short.smk @@ -239,7 +239,7 @@ rule merqury: report(os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.{sample}.spectra-cn.fl.png"), caption="../report/merqury/spectra-cn_filled.rst", category="Copy Number Spectra Plots", subcategory="{sample}"), os.path.join(config['Results'], "{sample}" +"/2_QVstats_merylAndMerqury/combinedRead1Read2.21.hist"), symlink_fasta=os.path.join(config['Results'], "{sample}" + "/{sample}.fasta"), - symlink_merylDB=os.path.join(config['Results'], "{sample}" + "/combinedRead1Read2.21.meryl") + symlink_merylDB=directory(os.path.join(config['Results'], "{sample}" + "/combinedRead1Read2.21.meryl")) log: os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/logs/{sample}_merqury.log") priority: @@ -272,7 +272,7 @@ rule genomescope2: check2=os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.{sample}.spectra-cn.st.png") params: outFolder=os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/"), - cpHist=os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/combinedRead1Read2.21.histo"), + cpHist=os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/combinedRead1Read2_10k.21.histo"), findGscope=os.path.join(workflow.basedir) output: summary=report(os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_summary.txt"), caption="../report/genomescope/summary.rst", category="Genomescope Profile", subcategory="{sample}"), @@ -283,12 +283,12 @@ rule genomescope2: "../envs/genomescope.yaml" shell: """ - expand -t 1 {input.hist} > {params.cpHist} + head -n 10000 {input.hist} > {params.cpHist} export genomescope2script=$(find {params.findGscope} -name genomescope2) Rscript $genomescope2script -i {params.cpHist} -o {params.outFolder} -k 21 -n {wildcards.sample} grep "Genome Haploid Length" {output.summary} | awk {{'print $6'}} | sed 's/,//g'> {output.estimatedSize} """ - + # expand -t 1 {input.hist} > {params.cpHist} rule assemblyStats: input: @@ -438,7 +438,7 @@ rule aggregateAllAssemblies: """ cp {input.sampleSheet} {output.newSampleSheet} cp {input.config} {output.newConfigFile} - echo -e "AAssembly\nsize_estimate\nmax_heterozygosity\nqv_score\nScaffold_N50_length\nScaffold_NG50_length\nScaffold_N95_length\nScaffold_NG95_length\nScaffold_N100_length\nScaffold_NG100_length\nContig_N50_length\nContig_NG50_length\ntotal_num_bases\ntotal_num_scaffolds\ntotal_num_contigs\nnumber_of_gaps" | \ + echo -e "Assembly\nsize_estimate\nmax_heterozygosity\nqv_score\nScaffold_N50_length\nScaffold_NG50_length\nScaffold_N95_length\nScaffold_NG95_length\nScaffold_N100_length\nScaffold_NG100_length\nContig_N50_length\nContig_NG50_length\ntotal_num_bases\ntotal_num_scaffolds\ntotal_num_contigs\nnumber_of_gaps" | \ paste -d'\t' - {input.allResults} | \ awk -F'\t' '{{s="";for (i=1;i<=NF;i+=2) {{s=s?s FS $i:$i}} print s}}' | \ column -t > {output.results} -- GitLab