diff --git a/SUBMIT_CONFIG/default/config.yaml b/SUBMIT_CONFIG/default/config.yaml index ee5b94e9a38e839b0d817b06dfee5ad4954e7065..d2f3f62f3edf0e834066716157fc758fd8bef020 100644 --- a/SUBMIT_CONFIG/default/config.yaml +++ b/SUBMIT_CONFIG/default/config.yaml @@ -1,5 +1,4 @@ cores: 64 -resources: mem_mb=600 dry-run: False use-conda: True latency-wait: 60 diff --git a/SUBMIT_CONFIG/slurm/config.yaml b/SUBMIT_CONFIG/slurm/config.yaml index 5e7fcf918b1fa9792ea682ba14901433b0ac7cc6..f2969ebcde976e68c4db9e6d80ca2a797d3942a9 100644 --- a/SUBMIT_CONFIG/slurm/config.yaml +++ b/SUBMIT_CONFIG/slurm/config.yaml @@ -1,34 +1,36 @@ -cluster-config: "cluster.yaml" +# cluster-config: "cluster.yaml" cluster: mkdir -p slurm_logs/{rule} && sbatch - --partition={cluster.partition} - --qos={cluster.qos} - --cpus-per-task={cluster.threads} - --mem={cluster.memory} - --job-name={cluster.jobname} + --partition=begendiv,main + --qos=standard + --cpus-per-task={threads} + --mem={resources.memory} + --job-name=GEP.{rule}.{wildcards}.%j --output=slurm_logs/{rule}/{rule}.{wildcards}.%j.out - --time={cluster.time} + --time={resources.time} #User Defines below parameters -# default-resources: -# - partition=begendiv -# - qos=standard -# - mem_mb=100000 -# - time="3-00:00:00" -# - nodes=1 +default-resources: + - partition=begendiv + - qos=standard + # - mem_mb=100000 + # - time="3-00:00:00" + # - nodes=1 + # restart-times: 3] # max-jobs-per-second: 100 max-status-checks-per-second: 10 latency-wait: 60 -# jobs: 500 +jobs: 10 keep-going: False rerun-incomplete: True printshellcmds: True scheduler: greedy use-conda: True +cores: 24 diff --git a/Snakefile b/Snakefile index 10eafcee9f5ce04f7113f76222399b6ddd455d18..b4c551258342e059707742728f1da26b9c800bca 100644 --- a/Snakefile +++ b/Snakefile @@ -2,6 +2,7 @@ import numpy as np from itertools import groupby import json import pandas as pd +import yaml import os import sys import csv @@ -11,6 +12,8 @@ import argparse, subprocess configfile: "configuration/config.yaml" +with open(config['resources'], 'r') as f: + resource = yaml.safe_load(f) def getBasename4Reads(path): base=os.path.basename(path) diff --git a/configuration/define_resources.yaml b/configuration/define_resources.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1243261d9fed40d264571cd6d3ec5be7991d8687 --- /dev/null +++ b/configuration/define_resources.yaml @@ -0,0 +1,360 @@ +# __default__: +# jobname: 'GEP.{rule}' +# partition: begendiv,main +# # nCPUs: "{threads}" +# qos: 'standard' +# + + +#### PRETEXT #### + +unzipHiC_R1: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 12000 + time: "00:30:05" + threads: 8 + +# symlinkUnzippedHiC_R1: +# jobname: GEP.{rule}.{wildcards.asmID} +# memory: 10 +# time: "00:05:00" +# threads: 1 + +unzipHiC_R2: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 12000 + time: "00:30:05" + threads: 8 + +# symlinkUnzippedHiC_R2: +# jobname: GEP.{rule}.{wildcards.asmID} +# memory: 10 +# time: "00:05:00" +# threads: 1 + + + +pretext_index_PRI_asm: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 72000 + time: "02:30:00" + threads: 2 + +pretext_fastq2bam_R1: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 32000 + time: "08:00:00" + threads: 12 + + +pretext_fastq2bam_R2: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 32000 + time: "08:00:00" + threads: 12 + +pretext_filter_5primeEnd_R1: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 200 + time: "08:00:00" + threads: 4 + +pretext_filter_5primeEnd_R2: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 200 + time: "08:00:00" + threads: 4 + + +pretext_filtered_combine: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 9500 + time: "08:30:00" + threads: 12 + +pretext_map: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 32000 + time: "08:30:00" + threads: 12 + +pretext_snapshot: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:10:00" + threads: 1 + +pretextMaps2md: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:10:00" + threads: 1 + + +unzipFasta_PRI: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:05:00" + threads: 4 + +# symlinkUnzippedFasta_PRI: +# jobname: GEP.{rule}.{wildcards.asmID} +# memory: 500 +# time: "00:05:00" +# threads: + + +unzipFasta_ALT: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:05:00" + threads: 14 + + +# +# symlinkUnzippedFasta_ALT: +# jobname: GEP.{rule}.{wildcards.asmID} +# memory: 500 +# time: "00:05:05" +# threads: 1 + + +merqury: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 40000 + time: "08:00:00" + threads: 12 + +busco5: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 150000 + time: "1-00:00:00" + threads: 16 + + + +# moveBuscoOutputs: +# jobname: GEP.{rule}.{wildcards.asmID} +# memory: 500 +# time: "00:05:05" +# threads: 1 + + + + + + + + +genomescope2: + jobname: GEP.{rule}.{wildcards.asmID}.{wildcards.kmer} + memory: 500 + time: "00:05:00" + threads: 1 + + +assemblyStats: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:05:00" + threads: 1 + +# saveConfiguration_and_getKeyValues_kmer: +# jobname: GEP.{rule}.{wildcards.asmID}.{wildcards.kmer} +# memory: 500 +# time: "00:05:00" +# threads: 1 +# +# +# saveConfiguration_and_getKeyValues: +# jobname: GEP.{rule}.{wildcards.asmID} +# memory: 500 +# time: "00:05:05" +# threads: 1 +# +# +# aggregateAllAssemblies: +# jobname: GEP.{rule} +# memory: 500 +# time: "00:05:05" +# threads: 1 + +# makeReport: +# jobname: GEP.{rule}.{wildcards.asmID}.{wildcards.kmer} +# memory: 500 +# time: "00:05:05" +# threads: 1 +# +# +# addFullTable: +# jobname: GEP.{rule} +# memory: 500 +# time: "00:05:05" +# threads: 1 +# +# aggregateReport: +# jobname: GEP.{rule} +# memory: 500 +# time: "00:05:05" +# threads: 1 + + +makePDF: + jobname: GEP.{rule} + memory: 1000 + time: "00:10:05" + threads: 1 + +###### Rules for illuminadb building ########## + +unzipFastq_R1: + jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.trim10x}.{wildcards.trimAdapters} + memory: 96000 + time: "00:30:05" + threads: 2 + + +unzipFastq_R2: + jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.trim10x}.{wildcards.trimAdapters} + memory: 96000 + time: "00:30:05" + threads: 4 + +# symlinkUnzippedFastq_R1: +# jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.trim10x}.{wildcards.trimAdapters} +# memory: 500 +# time: "00:01:35" +# threads: 1 +# +# symlinkUnzippedFastq_R2: +# jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.trim10x}.{wildcards.trimAdapters} +# memory: 500 +# time: "00:01:35" +# threads: 1 + + + + +# symLink_trim10xbarcodes_notrimAdapt: +# jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter} +# memory: 500 +# time: "00:01:35" +# threads: 1 +# +# symlinks_no10xOrAdaptTrim: +# jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter} +# memory: 500 +# time: "00:01:35" +# threads: 1 +# +# +# symlinks_no10xwithAdaptTrim: +# jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter} +# memory: 500 +# time: "00:01:35" +# threads: 1 +# +# +# symlink_trim10xbarcodesR2: +# jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.trimAdapters} +# memory: 500 +# time: "00:01:35" +# threads: 1 + + +trim10xbarcodes: + jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.trimAdapters} + memory: 500 + time: "04:00:05" + threads: 4 + + + +trimAdapters: + jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.trim10x} + memory: 500 + time: "04:00:05" + threads: 8 + + +fastqc_Illumina: + jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.trimAdapters}.{wildcards.trim10x} + memory: 1000 + time: "04:00:05" + threads: 4 + +# multiqc_hifi: +# jobname: GEP.{rule}.{wildcards.sample}.{wildcards.trimAdapters}.{wildcards.trim10x} +# memory: 500 +# time: "01:00:05" +# threads: 1 + +meryl_R1: + jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.trimAdapters}.{wildcards.trim10x}.{wildcards.kmer} + memory: 20000 + time: "01:00:05" + threads: 8 + +meryl_R2: + jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.trimAdapters}.{wildcards.trim10x}.{wildcards.kmer} + memory: 20000 + time: "01:00:05" + threads: 8 + +meryl_illumina_build: + jobname: GEP.{rule}.{wildcards.sample}.{wildcards.trimAdapters}.{wildcards.trim10x}.{wildcards.kmer} + memory: 30000 + time: "00:45:05" + threads: 8 + + + + +###### HIFI BUILD ####### + +unzipHifi: + # jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.smrtornot} + memory: 128000 + time: "00:30:05" + threads: 4 + +# symlinkUnzippedHifi: +# jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.smrtornot} +# memory: 500 +# time: "00:05:05" + +# symlinkfornotSmartTrimmed: +# jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter} +# memory: 500 +# time: "00:05:05" + + +fastqc_hifi: + # jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.smrtornot} + memory: 12000 + time: "04:00:05" + threads: 4 + +# multiqc_hifi: +# # jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.smrtornot} +# memory: 4000 +# time: "01:15:05" + +meryl_hifi_count: + # jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.smrtornot}.{wildcards.kmer} + memory: 96000 + time: "04:30:05" + threads: 16 + +meryl_hifi_build: + # jobname: GEP.{rule}.{wildcards.sample}.{wildcards.smrtornot}.{wildcards.kmer} + memory: 96000 + time: "04:30:05" + threads: 16 + + +trimSMRTbell: + jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter} + memory: 96000 + time: "04:30:05" + threads: 8 diff --git a/rules/hifi_05_11_21.smk b/rules/hifi_05_11_21.smk index 5aa4f544b01e6d87e7e84a99b90c506bb93a1a63..932b989b66c6a6cfb6f5e5a20314516f46e635bd 100644 --- a/rules/hifi_05_11_21.smk +++ b/rules/hifi_05_11_21.smk @@ -12,6 +12,9 @@ # def fq_to_trimSMRTbell(wildcards): # return samples.loc[(wildcards.sample, wildcards.readCounter), "hifi_reads"] +localrules: symlinkUnzippedHifi, symlinkfornotSmartTrimmed, multiqc_hifi + + def fq_to_trimSMRTbell(wildcards): return trimSMRTbell.loc[(wildcards.sample, wildcards.readCounter), "hifi_reads"] @@ -39,7 +42,10 @@ rule unzipHifi: conda: "../envs/pigz.yaml" threads: - 4 + resource['unzipHifi']['threads'] + resources: + memory=resource['unzipHifi']['memory'], + time=resource['unzipHifi']['time'], shell: """ pigz -p {threads} -c -d -k {input.fastq} > {output} 2> {log} @@ -52,8 +58,8 @@ rule symlinkUnzippedHifi: temp(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/01_unzipFastqs/{readCounter}.{smrtornot}.fastq")), log: os.path.join(config['Results'], "0_buildDatabases/{sample}/logs/hifiReads/{readCounter}.{smrtornot}_pigzUnzip.log") - threads: - 1 + # threads: + # 1 shell: """ ln -s {input.fastq} {output} @@ -86,7 +92,10 @@ rule trimSMRTbell: # outputFile=os.path.join(config['Results'],"{sample}/0_preProcessing/01_trimHifi/{readCounter}_smrtTrimmed.fastq") outputFile=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/02_trimReads/{readCounter}.smrtTrimmed.fastq")) threads: - 8 + resource['trimSMRTbell']['threads'] + resources: + memory=resource['trimSMRTbell']['memory'], + time=resource['trimSMRTbell']['time'], log: os.path.join(config['Results'], "0_buildDatabases/{sample}/logs/hifiReads/{readCounter}_trimSMRTbell.log") priority: @@ -103,8 +112,8 @@ rule symlinkfornotSmartTrimmed: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/01_unzipFastqs/{readCounter}.notsmrtTrimmed.fastq"), output: outputFile=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/02_trimReads/{readCounter}.notsmrtTrimmed.fastq")) - threads: - 1 + # threads: + # 1 shell: """ ln -s {input} {output.outputFile} @@ -118,7 +127,10 @@ rule fastqc_hifi: output: os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/05_multiqc/{readCounter}.{smrtornot}_fastqc.html") threads: - 1 + resource['trimSMRTbell']['threads'] + resources: + memory=resource['fastqc_hifi']['memory'], + time=resource['fastqc_hifi']['time'], log: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{readCounter}.{smrtornot}.FastQC.log") conda: @@ -137,8 +149,8 @@ rule multiqc_hifi: filename="{sample}.{smrtornot}.multiqcReport.html" output: os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/05_multiqc/{sample}.{smrtornot}.multiqcReport.html") - threads: - 1 + # threads: + # 1 log: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{sample}.{smrtornot}.multiqc.log") conda: @@ -158,7 +170,10 @@ rule meryl_hifi_count: kmer = "{kmer}" # path= os.path.join(config['Results'], "{sample}" +"/2_QVstats_merylAndMerqury/") threads: - 16 + resource['meryl_hifi_count']['threads'] + resources: + memory=resource['meryl_hifi_count']['memory'], + time=resource['meryl_hifi_count']['time'], output: temp(directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/03_merylDb/" + "{readCounter}" + "_hifi_dB.{smrtornot}.{kmer}.meryl"))), log: @@ -181,7 +196,10 @@ rule meryl_hifi_build: params: kmer = "{kmer}" threads: - 16 + resource['meryl_hifi_build']['threads'] + resources: + memory=resource['meryl_hifi_build']['memory'], + time=resource['meryl_hifi_build']['time'], output: directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/03_merylDb/complete_hifi_{sample}_dB.{smrtornot}.{kmer}.meryl")), log: diff --git a/rules/illumina_05_11_21.smk b/rules/illumina_05_11_21.smk index 4ab32dcebebd1ba475ee84521145444a7f350185..4c58a1f27503368095d656dc75a1d9acc70b5443 100644 --- a/rules/illumina_05_11_21.smk +++ b/rules/illumina_05_11_21.smk @@ -24,6 +24,12 @@ # # +localrules: symlinkUnzippedFastq_R1, symlinkUnzippedFastq_R2, symLink_trim10xbarcodes_notrimAdapt, symlinks_no10xOrAdaptTrim, symlink_trim10xbarcodesR2, multiqc_hifi + + + + + def R1_gzipped(wildcards): return yesGzip_R1.loc[(wildcards.sample, wildcards.readCounter), "Library_R1"] @@ -40,13 +46,16 @@ rule unzipFastq_R1: input: assembly=R1_gzipped, output: - os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R1.fastq"), + temp(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R1.fastq")), log: - temp(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_R1_pigzUnzip.log")), + os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_R1_pigzUnzip.log"), conda: "../envs/pigz.yaml" threads: - 4 + resource['unzipFastq_R1']['threads'] + resources: + memory=resource['unzipFastq_R1']['memory'], + time=resource['unzipFastq_R1']['time'] shell: """ pigz -p {threads} -c -d -k {input.assembly} > {output} 2> {log} @@ -57,8 +66,8 @@ rule symlinkUnzippedFastq_R1: assembly=R1_notgzipped, output: temp(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R1.fastq")), - threads: - 1 + # threads: + # 1 shell: """ ln -s {input} {output} @@ -68,13 +77,16 @@ rule unzipFastq_R2: input: assembly=R2_gzipped, output: - temp(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R2.fastq")), + os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R2.fastq"), log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_R2_pigzUnzip.log") conda: "../envs/pigz.yaml" threads: - 4 + resource['unzipFastq_R2']['threads'] + resources: + memory=resource['unzipFastq_R2']['memory'], + time=resource['unzipFastq_R2']['time'] shell: """ pigz -p {threads} -c -d -k {input.assembly} > {output} 2> {log} @@ -85,8 +97,8 @@ rule symlinkUnzippedFastq_R2: assembly=R2_notgzipped, output: temp(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R2.fastq")), - threads: - 1 + # threads: + # 1 shell: """ ln -s {input} {output} @@ -101,7 +113,10 @@ rule trim10xbarcodes: read1=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.{trimAdapters}_Read1.fastq")), # read2=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.{trimAdapters}_Read2.fastq")) threads: - 4 + resource['trim10xbarcodes']['threads'] + resources: + memory=resource['trim10xbarcodes']['memory'], + time=resource['trim10xbarcodes']['time'] log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.10xTrimmed.10BarcodeRemoval_Trimmomatic.{trimAdapters}.log") conda: @@ -118,8 +133,8 @@ rule symlink_trim10xbarcodesR2: read2=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.10xTrimmed.{trimAdapters}_R2.fastq") output: read2=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.{trimAdapters}_Read2.fastq")) - threads: - 1 + # threads: + # 1 conda: "../envs/pigz.yaml" shell: @@ -132,12 +147,12 @@ rule symlink_trim10xbarcodesR2: rule symLink_trim10xbarcodes_notrimAdapt: input: read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_Read1.fastq"), - read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_Read2.fastq") + read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_Read2.fastq"), output: read1=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_val_1.fq")), read2=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_val_2.fq")) - threads: - 1 + # threads: + # 1 shell: """ ln -s {input.read1} {output.read1} @@ -151,8 +166,8 @@ rule symlinks_no10xOrAdaptTrim: output: read1=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.not10xTrimmed.notAdaptTrimmed_val_1.fq")), read2=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.not10xTrimmed.notAdaptTrimmed_val_2.fq")) - threads: - 1 + # threads: + # 1 shell: """ ln -s {input.read1} {output.read1} @@ -166,8 +181,8 @@ rule symlinks_no10xwithAdaptTrim: output: read1=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.not10xTrimmed.AdaptTrimmed_Read1.fastq")), read2=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.not10xTrimmed.AdaptTrimmed_Read2.fastq")) - threads: - 1 + # threads: + # 1 shell: """ ln -s {input.read1} {output.read1} @@ -177,7 +192,7 @@ rule symlinks_no10xwithAdaptTrim: rule trimAdapters: input: read1= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_Read1.fastq"), - read2= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_Read2.fastq") + read2= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_Read2.fastq"), params: outputDir=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/"), r1_prefix="{readCounter}.{trim10x}.AdaptTrimmed", @@ -186,7 +201,10 @@ rule trimAdapters: read1=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_val_1.fq")), read2=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_val_2.fq")) threads: - 8 + resource['trimAdapters']['threads'] + resources: + memory=resource['trimAdapters']['memory'], + time=resource['trimAdapters']['time'], log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.AdaptTrimmed_tGalore.log") conda: @@ -208,7 +226,10 @@ rule fastqc_Illumina: log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_fastqc.log") threads: - 4 + resource['fastqc_Illumina']['threads'] + resources: + memory=resource['fastqc_Illumina']['memory'], + time=resource['fastqc_Illumina']['time'], conda: "../envs/pigz.yaml" shell: @@ -226,8 +247,8 @@ rule multiqc_hifi: os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/05_multiqc/{sample}.{trim10x}.{trimAdapters}.multiqcReport.html") log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{sample}.{trim10x}.{trimAdapters}_multiqc.log") - threads: - 1 + # threads: + # 1 conda: "../envs/pigz.yaml" shell: @@ -242,7 +263,10 @@ rule meryl_R1: kmer = "{kmer}", # path= os.path.join(config['Results'], "{sample}" +"/2_QVstats_merylAndMerqury/") threads: - 12 + resource['meryl_R1']['threads'] + resources: + memory=resource['meryl_R1']['memory'], + time=resource['meryl_R1']['time'], output: temp(directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/03_merylDb/{readCounter}.{trim10x}.{trimAdapters}_R1.{kmer}.meryl"))) log: @@ -264,7 +288,10 @@ rule meryl_R2: kmer = "{kmer}", # path= os.path.join(config['Results'], "{sample}" +"/2_QVstats_merylAndMerqury/") threads: - 12 + resource['meryl_R2']['threads'] + resources: + memory=resource['meryl_R2']['memory'], + time=resource['meryl_R2']['time'], output: temp(directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/03_merylDb/{readCounter}.{trim10x}.{trimAdapters}_R2.{kmer}.meryl"))) log: @@ -291,7 +318,10 @@ rule meryl_illumina_build: kmer = "{kmer}", path= os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/03_merylDb/") threads: - 12 + resource['meryl_illumina_build']['threads'] + resources: + memory=resource['meryl_illumina_build']['memory'], + time=resource['meryl_illumina_build']['time'], output: directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/03_merylDb/{sample}_illuminaDb.{trim10x}.{trimAdapters}.{kmer}.meryl")), log: @@ -302,5 +332,6 @@ rule meryl_illumina_build: "../envs/merylMerq_2.yaml" shell: """ + export OMP_NUM_THREADS={threads} (meryl union-sum {input} output {output}) &> {log} """ diff --git a/rules/run_05_11_21.smk b/rules/run_05_11_21.smk index e73789fc112ad7ed47d3fa544a6883c59eda555c..a91a08c512d17ddc25004bee8c6d39c7fcde6831 100644 --- a/rules/run_05_11_21.smk +++ b/rules/run_05_11_21.smk @@ -66,8 +66,7 @@ def merylDB(wildcards): # return newList - - +localrules: symlinkUnzippedHiC_R1, symlinkUnzippedHiC_R2, symlinkUnzippedFasta_PRI, symlinkUnzippedFasta_ALT, moveBuscoOutputs, saveConfiguration_and_getKeyValues_kmer, saveConfiguration_and_getKeyValues, aggregateAllAssemblies, makeReport, pretextMaps2md, addFullTable, aggregateReport def HiC_R1_gzipped(wildcards): return yesGzip_HiC_R1.loc[(wildcards.asmID), "HiC_R1"] @@ -82,7 +81,10 @@ rule unzipHiC_R1: conda: "../envs/pigz.yaml" threads: - 4 + resource['unzipHiC_R1']['threads'] + resources: + memory=resource['unzipHiC_R1']['memory'], + time=resource['unzipHiC_R1']['time'], shell: """ pigz -p {threads} -c -d -k {input.assembly} > {output} 2> {log} @@ -96,8 +98,8 @@ rule symlinkUnzippedHiC_R1: assembly=HiC_R1_unzipped, output: temp(os.path.join(config['Results'], "1_evaluation/{asmID}/07_pretext/{asmID}.HiC.R1.fastq")), - threads: - 1 + # threads: + # 1 shell: """ ln -s {input} {output} @@ -117,7 +119,10 @@ rule unzipHiC_R2: conda: "../envs/pigz.yaml" threads: - 4 + resource['unzipHiC_R2']['threads'] + resources: + memory=resource['unzipHiC_R2']['memory'], + time=resource['unzipHiC_R2']['time'] shell: """ pigz -p {threads} -c -d -k {input.assembly} > {output} 2> {log} @@ -131,8 +136,8 @@ rule symlinkUnzippedHiC_R2: assembly=HiC_R2_unzipped, output: temp(os.path.join(config['Results'], "1_evaluation/{asmID}/07_pretext/{asmID}.HiC.R2.fastq")), - threads: - 1 + # threads: + # 1 shell: """ ln -s {input} {output} @@ -156,7 +161,11 @@ rule pretext_index_PRI_asm: os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.PRI.indexing.log") conda: "../envs/pretext.yaml" - threads: 1 + threads: + resource['pretext_index_PRI_asm']['threads'] + resources: + memory=resource['pretext_index_PRI_asm']['memory'], + time=resource['pretext_index_PRI_asm']['time'] shell: """ (bwa-mem2 index {input.assemblyPRI}) &> {log} @@ -178,7 +187,10 @@ rule pretext_fastq2bam_R1: conda: "../envs/pretext.yaml" threads: - 12 + resource['pretext_fastq2bam_R1']['threads'] + resources: + memory=resource['pretext_fastq2bam_R1']['memory'], + time=resource['pretext_fastq2bam_R1']['time'] log: os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.fastq2bam.R1.log") shell: @@ -201,7 +213,10 @@ rule pretext_fastq2bam_R2: conda: "../envs/pretext.yaml" threads: - 12 + resource['pretext_fastq2bam_R2']['threads'] + resources: + memory=resource['pretext_fastq2bam_R2']['memory'], + time=resource['pretext_fastq2bam_R2']['time'] log: os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.fastq2bam.R2.log") shell: @@ -226,7 +241,10 @@ rule pretext_filter_5primeEnd_R1: conda: "../envs/pretext.yaml" threads: - 12 + resource['pretext_filter_5primeEnd_R1']['threads'] + resources: + memory=resource['pretext_filter_5primeEnd_R1']['memory'], + time=resource['pretext_filter_5primeEnd_R1']['time'] log: os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.filter5end.R1.log") shell: @@ -251,7 +269,10 @@ rule pretext_filter_5primeEnd_R2: conda: "../envs/pretext.yaml" threads: - 12 + resource['pretext_filter_5primeEnd_R2']['threads'] + resources: + memory=resource['pretext_filter_5primeEnd_R2']['memory'], + time=resource['pretext_filter_5primeEnd_R2']['time'] log: os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.filter5end.R2.log") shell: @@ -271,7 +292,10 @@ rule pretext_filtered_combine: conda: "../envs/pretext.yaml" threads: - 12 + resource['pretext_filtered_combine']['threads'] + resources: + memory=resource['pretext_filtered_combine']['memory'], + time=resource['pretext_filtered_combine']['time'] log: os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.COMBINED.FILTERED.log") shell: @@ -287,7 +311,10 @@ rule pretext_map: conda: "../envs/pretext.yaml" threads: - 12 + resource['pretext_map']['threads'] + resources: + memory=resource['pretext_map']['memory'], + time=resource['pretext_map']['time'] log: os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.COMBINED.FILTERED.PretextMap.log") shell: @@ -306,7 +333,10 @@ rule pretext_snapshot: conda: "../envs/pretext.yaml" threads: - 1 + resource['pretext_snapshot']['threads'] + resources: + memory=resource['pretext_snapshot']['memory'], + time=resource['pretext_snapshot']['time'] log: os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.COMBINED.FILTERED.PretextSnapshot.log") shell: @@ -330,7 +360,10 @@ rule unzipFasta_PRI: conda: "../envs/pigz.yaml" threads: - 4 + resource['unzipFasta_PRI']['threads'] + resources: + memory=resource['unzipFasta_PRI']['memory'], + time=resource['unzipFasta_PRI']['time'] shell: """ pigz -p {threads} -c -d -k {input.assembly} > {output} 2> {log} @@ -344,8 +377,8 @@ rule symlinkUnzippedFasta_PRI: assembly=PRI_asm_unzipped, output: temp(os.path.join(config['Results'], "1_evaluation/{asmID}/01_unzipFastas/{asmID}.PRI.fasta")), - threads: - 1 + # threads: + # 1 shell: """ ln -s {input} {output} @@ -366,7 +399,10 @@ rule unzipFasta_ALT: conda: "../envs/pigz.yaml" threads: - 4 + resource['unzipFasta_ALT']['threads'] + resources: + memory=resource['unzipFasta_ALT']['memory'], + time=resource['unzipFasta_ALT']['time'] shell: """ pigz -p {threads} -c -d -k {input.assembly} > {output} 2> {log} @@ -381,8 +417,8 @@ rule symlinkUnzippedFasta_ALT: assembly=ALT_asm_unzipped, output: temp(os.path.join(config['Results'], "1_evaluation/{asmID}/01_unzipFastas/{asmID}.ALT.fasta")), - threads: - 1 + # threads: + # 1 shell: """ ln -s {input} {output} @@ -410,7 +446,10 @@ rule merqury: # symlink_fasta=os.path.join(config['Results'], "01_evaluation/{asmID}/01B_QV-and-kmerMultiplicity/{asmID}.fasta"), symlink_merylDB=directory(os.path.join(config['Results'], "1_evaluation/{asmID}/04_merquryQVandKAT/merylDB_providedFor_{asmID}.meryl")) threads: - 12 + resource['merqury']['threads'] + resources: + memory=resource['merqury']['memory'], + time=resource['merqury']['time'] output: os.path.join(config['Results'],"1_evaluation/{asmID}/04_merquryQVandKAT/{asmID}_merqOutput.qv"), os.path.join(config['Results'],"1_evaluation/{asmID}/04_merquryQVandKAT/{asmID}_merqOutput.completeness.stats"), @@ -448,7 +487,10 @@ rule busco5: assemblyName = "{asmID}", chngDir = os.path.join(config['Results'], "1_evaluation/{asmID}/05_BUSCO") threads: - 16 + resource['busco5']['threads'] + resources: + memory=resource['busco5']['memory'], + time=resource['busco5']['time'] output: # report(os.path.join(config['Results'], "{sample}" + "/busco5/" + "{sample}" + "/short_summary.specific." + config['busco5Lineage'] + "_odb10." + "{sample}" + ".txt"), caption="../report/busco.rst", category="Benchmark Universal Single Copy Orthologs", subcategory="{fastq}") os.path.join(config['Results'], "1_evaluation/{asmID}/05_BUSCO/{asmID}/short_summary.specific." + buscoDataBaseName + "_odb10.{asmID}.txt"), @@ -484,8 +526,8 @@ rule moveBuscoOutputs: # blastDB= os.path.join(config['Results'], "{sample}" + "/busco5/blast_db") output: file = os.path.join(config['Results'], "1_evaluation/{asmID}/05_BUSCO/short_summary.specific." + buscoDataBaseName + "_odb10.{asmID}.txt"), - threads: - 1 + # threads: + # 1 shell: """ mv -t {params.mvRunBuscoDest} {params.logs} @@ -518,7 +560,10 @@ rule genomescope2: log: os.path.join(config['Results'],"1_evaluation/{asmID}/logs/{asmID}_k{kmer}_gscopelog.txt") threads: - 1 + resource['genomescope2']['threads'] + resources: + memory=resource['genomescope2']['memory'], + time=resource['genomescope2']['time'] shell: """ head -n 10000 {input.hist} > {params.cpHist} @@ -545,7 +590,10 @@ rule assemblyStats: filename="{asmID}", given_size=lambda wildcards: expand("{genomeSize}", genomeSize=testDict[wildcards.asmID][4]) threads: - 1 + resource['assemblyStats']['threads'] + resources: + memory=resource['assemblyStats']['memory'], + time=resource['assemblyStats']['time'] shell: """ python {params.script} {input.assembly} {input.estGenome} {params.filename} {params.given_size} {output.scaffStats} {output.contStats} @@ -569,8 +617,8 @@ rule saveConfiguration_and_getKeyValues_kmer: gscopeSum=os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/{asmID}_k{kmer}_summary.txt"), gscopeLog=os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/{asmID}_k{kmer}_log_plot.png"), gscopeLin=os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/{asmID}_k{kmer}_linear_plot.png") - threads: - 1 + # threads: + # 1 # os.path.join(config['Results'],"{sample}" + "/QVstats_merylAndMerqury/logs/" + "{sample}" + "_meryl.log"), # multiqc=os.path.join(config['Results'],"{sample}/5_31_Key_Results/{sample}_multiqc_report.html") # aggregateTsv=os.path.join(config['Results'],"{sample}"+ "/individual_aggregated_results/aggregate.tsv") @@ -719,8 +767,8 @@ rule saveConfiguration_and_getKeyValues: # os.path.join(config['Results'],"{sample}" + "/assemblyStats/{sample}_contig_stats.tsv"), scaffStats=os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_scaffold_stats.tsv"), contStats=os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_contig_stats.tsv"), - threads: - 1 + # threads: + # 1 # os.path.join(config['Results'],"{sample}" + "/QVstats_merylAndMerqury/logs/" + "{sample}" + "_meryl.log"), # multiqc=os.path.join(config['Results'],"{sample}/5_31_Key_Results/{sample}_multiqc_report.html") # aggregateTsv=os.path.join(config['Results'],"{sample}"+ "/individual_aggregated_results/aggregate.tsv") @@ -838,8 +886,8 @@ rule aggregateAllAssemblies: # rows=os.path.join(config['Results'],"allAssemblies_keyResults/aggregate_rows.tsv"), newSampleSheet=os.path.join(config['Results'],"1_evaluation/finalResults/savedSampleSheet.tsv"), newConfigFile=os.path.join(config['Results'],"1_evaluation/finalResults/savedConfig.yaml") - threads: - 1 + # threads: + # 1 shell: """ cp {input.sampleSheet} {output.newSampleSheet} @@ -862,8 +910,8 @@ rule makeReport: os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/only_buscoScores_{asmID}.txt"), os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_merqOutput.{asmID}.PRI.spectra-cn.fl.png"), os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_merqOutput.spectra-cn.fl.png") - threads: - 1 + # threads: + # 1 params: "{asmID}", "{kmer}", @@ -928,8 +976,8 @@ rule pretextMaps2md: IndividualPretextMD=os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_FullPRETEXTMarkdown.md") params: assemblyName='{asmID}' - threads: - 1 + # threads: + # 1 run: import shutil shutil.copyfile(input.PretextMap, output.pretextCP2keyResults) @@ -962,8 +1010,8 @@ rule addFullTable: results=os.path.join(config['Results'],"1_evaluation/finalResults/Combined_Results_FULLTABLE.tsv") output: os.path.join(config['Results'],"1_evaluation/finalResults/FullTableMarkdown.md") - threads: - 1 + # threads: + # 1 run: import pandas as pd from tabulate import tabulate @@ -1006,8 +1054,8 @@ rule aggregateReport: output: FullMarkdown=os.path.join(config['Results'],"1_evaluation/finalResults/FullMarkdown.md"), endTableMD=os.path.join(config['Results'],"1_evaluation/finalResults/FullTableMarkdown_wPreamble.md") - threads: - 1 + # threads: + # 1 shell: """ cat {input.landingPage} {input.indivMD} {input.IndividualPretextMD} >> {output.FullMarkdown} @@ -1026,7 +1074,10 @@ rule makePDF: log: os.path.join(config['Results'], "1_evaluation/logs/MAKEPDF_pandoc.log") threads: - 1 + resource['makePDF']['threads'] + resources: + memory=resource['makePDF']['memory'], + time=resource['makePDF']['time'] shell: """ (pandoc -o {output.pdf_report} {input.md_report} --pdf-engine=tectonic) &>> {log}