diff --git a/SUBMIT_CONFIG/slurm/cluster.yaml b/SUBMIT_CONFIG/slurm/cluster.yaml index 42cab37fdae39abc715a26e813eafc73891dbd73..5019c6644e774fa53bc50ead8c6d493e298463b3 100644 --- a/SUBMIT_CONFIG/slurm/cluster.yaml +++ b/SUBMIT_CONFIG/slurm/cluster.yaml @@ -340,7 +340,7 @@ nanoplot_hifi: longQC_hifi: jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.smrtornot} - mem_mb: 12000 + mem_mb: 96000 time: "04:00:05" multiqc_hifi: diff --git a/Snakefile b/Snakefile index 8facab1a486aa77c36de36c112cd96027312ba58..4c6e19c3cd762f1f2c45b2be7d1a1acde4ed2ee7 100644 --- a/Snakefile +++ b/Snakefile @@ -139,7 +139,7 @@ elif set(['sample', 'hifi_reads', 'meryl_kmer_size','trimSMRTbell', 'fastQC']).i samples=samples.set_index(['sample','readCounter']) longqc_dir = "tools/LongQC" - + sequencing_type = "pb-sequel" if config["use_pacbio"] else "ont-rapid" ruleAllQCFiles=[] ### TODO change this column in a generic QC analysis. (both nanoplot and and longqc running) diff --git a/configuration/config.yaml b/configuration/config.yaml index a57dadd97e844e96c029e5a5f7b5e269105bc35a..c681466efa94c20d8e5753f2cc76f8ba53952187 100644 --- a/configuration/config.yaml +++ b/configuration/config.yaml @@ -1,13 +1,16 @@ ### PATH TO WHERE YOU WANT YOUR RESULTS FOR THIS RUN STORED - WILL BE AUTOMATICALLY CREATED IF IT DOESN'T EXIST ### -Results: "/home/valentina/Desktop/test_gep/results_test_new" +Results: "/home/valentina/Desktop/test_gep/results_test_new_11" ### FULL PATH TO YOUR SAMPLESHEET ### -samplesTSV: "/home/valentina/Desktop/test_gep/build_hifi_example.tsv" +samplesTSV: "/home/valentina/Desktop/test_gep/build_nano_example.tsv" ### PATH TO DEFINE RESOURCES FILE - DO NOT CHANGE UNLESS YOU WISH TO USE IN A DIFFERENT LOCATION #### resources: "configuration/define_resources.yaml" -### With EAR=True the ERGA Assembly Report is automatically generated from the analysis -EAR: False smudgeplot: False kmer_plot_flat: False +use_pacbio: True +run_longqc: True + +### With EAR=True the ERGA Assembly Report is automatically generated from the analysis +EAR: False diff --git a/configuration/define_resources.yaml b/configuration/define_resources.yaml index 4383973ccbc36fbb9f4f7f509d186158d682911d..f72dbe865397f682b38072b6a8a20208487f9cf7 100644 --- a/configuration/define_resources.yaml +++ b/configuration/define_resources.yaml @@ -186,3 +186,8 @@ merylUnion_hifi: mem_mb: 96000 time: "12:00:00" threads: 12 + +install_longqc: + mem_mb: 4000 + time: "01:00:00" + threads: 1 diff --git a/rules/build_hifi.smk b/rules/build_hifi.smk index e3e9aaa5910ba20456df060b4dc56305cb35409d..f9508ab7a3fed3aa298e2e4de708fdb913d38120 100644 --- a/rules/build_hifi.smk +++ b/rules/build_hifi.smk @@ -69,7 +69,7 @@ rule trimSMRTBellAdapters_hifi: log: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/trimSMRTbell.{readCounter}.log") priority: - 15 + 2 conda: os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") shell: @@ -110,11 +110,18 @@ rule fastQC_hifi: (fastqc {input} -o {params.folder2out} -t {threads}) &> {log} """ +def longqc_input(wildcards): + # if longqc is disactivated by the user, this input function return the output of nanoplot rather than the longQC one + if config["run_longqc"]: + return os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/longQC/web_summary.html") + else: + return os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/nanoplot/NanoPlot-report.html") + rule multiQC_hifi: input: lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/fastqc/{readCounter}.{smrtornot}_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], smrtornot=dictSamples[wildcards.sample][1]), os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/nanoplot/NanoPlot-report.html"), - os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/longQC/web_summary.html") + longqc_input params: folder2qc=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/fastqc/"), folder2qc2=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/nanoplot/"), @@ -145,7 +152,7 @@ rule merylCount_hifi: log: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/meryl_hifi_count.{readCounter}.{kmer}.{smrtornot}.log") priority: - 10 + 1 conda: os.path.join(workflow.basedir, "envs/MER_STATS.yaml") shell: @@ -155,7 +162,9 @@ rule merylCount_hifi: rule merylUnion_hifi: input: - lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/merylDb/{readCounter}_hifi_dB.{smrtornot}.{kmer}.meryl/"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], kmer=dictSamples[wildcards.sample][0], smrtornot=dictSamples[wildcards.sample][1]) + db = lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/merylDb/{readCounter}_hifi_dB.{smrtornot}.{kmer}.meryl/"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], kmer=dictSamples[wildcards.sample][0], smrtornot=dictSamples[wildcards.sample][1]), + output_nanoplot = os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/nanoplot/NanoPlot-report.html"), + output_longqc = longqc_input params: kmer = "{kmer}", removeReadDIR_trimmed=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/"), @@ -170,12 +179,12 @@ rule merylUnion_hifi: log: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/meryl_hifi_combine.{sample}.{kmer}.log") priority: - 10 + 1 conda: os.path.join(workflow.basedir, "envs/MER_STATS.yaml") shell: """ - (meryl union-sum {input} output {output}) &> {log} + (meryl union-sum {input.db} output {output}) &> {log} rm -r {params.removeReadDIR_trimmed} rm -r {params.removeReadDIR_unzipped} """ @@ -194,6 +203,8 @@ rule NanoPlot: time=resource['nanoplot_hifi']['time'], log: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/nanoplot.log") + priority: + 2 conda: os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") shell: @@ -206,7 +217,8 @@ rule longQC: fastq = lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/{readCounter}.{smrtornot}.fastq"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], smrtornot=dictSamples[wildcards.sample][1]), path_lonqqc = os.path.join(longqc_dir,"longQC.py") params: - folder2out=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/longQC/") + folder2out=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/longQC/"), + sequencing_type = sequencing_type output: os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/longQC/web_summary.html") threads: @@ -216,13 +228,14 @@ rule longQC: time=resource['longQC_hifi']['time'], log: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/longQC.log") + priority: + 2 conda: os.path.join(workflow.basedir, "envs/LONGQC.yaml") shell: """ - ## TODO making either pacbio or nanopore for -x - rmdir {params.folder2out} - (python tools/LongQC/longQC.py sampleqc -x pb-sequel -o {params.folder2out} {input.fastq}) &> {log} + rm -r {params.folder2out} + (python tools/LongQC/longQC.py sampleqc -x {params.sequencing_type} -o {params.folder2out} {input.fastq}) &> {log} """ rule install_longqc: