diff --git a/SUBMIT_CONFIG/slurm/cluster.yaml b/SUBMIT_CONFIG/slurm/cluster.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb3e45ed39a1d486b6ac0682de054e5293db3d41 --- /dev/null +++ b/SUBMIT_CONFIG/slurm/cluster.yaml @@ -0,0 +1,105 @@ +__default__: + jobname: 'GEP.{rule}' + partition: begendiv + nCPUs: "{threads}" + qos: 'standard' + nodes: 1 + + +unzipFasta_PRI: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:00:05" + +symlinkUnzippedFasta_PRI: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:00:05" + + +unzipFasta_ALT: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:00:05" + + + +symlinkUnzippedFasta_ALT: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:00:05" + + + +busco5: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 100000 + time: "1-00:00:00" + + + +moveBuscoOutputs: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:00:05" + + + + +merqury: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 20000 + time: "08:00:00" + + + +genomescope2: + jobname: GEP.{rule}.{wildcards.asmID}.{wildcards.kmer} + memory: 500 + time: "00:00:05" + + +assemblyStats: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:00:05" + +saveConfiguration_and_getKeyValues_kmer: + jobname: GEP.{rule}.{wildcards.asmID}.{wildcards.kmer} + memory: 500 + time: "00:00:05" + + +saveConfiguration_and_getKeyValues: + jobname: GEP.{rule}.{wildcards.asmID} + memory: 500 + time: "00:00:05" + +aggregateAllAssemblies: + jobname: GEP.{rule} + memory: 500 + time: "00:00:05" + + +makeReport: + jobname: GEP.{rule}.{wildcards.asmID}.{wildcards.kmer} + memory: 500 + time: "00:00:05" + + + +addFullTable: + jobname: GEP.{rule} + memory: 500 + time: "00:00:05" + +aggregateReport: + jobname: GEP.{rule} + memory: 500 + time: "00:00:05" + + +makePDF: + jobname: GEP.{rule} + memory: 500 + time: "00:00:05" diff --git a/SUBMIT_CONFIG/slurm/config.yaml b/SUBMIT_CONFIG/slurm/config.yaml index 3774306c444ad2e8241eb03327d46c44d636201d..07b01f5a0b325352add3c85055df969ada221add 100644 --- a/SUBMIT_CONFIG/slurm/config.yaml +++ b/SUBMIT_CONFIG/slurm/config.yaml @@ -1,27 +1,31 @@ + +cluster-config: "cluster.yaml" + cluster: - mkdir -p logs/{rule} && + mkdir -p slurm_logs/{rule} && sbatch - --partition={resources.partition} - --qos={resources.qos} - --cpus-per-task={threads} - --mem={resources.mem_mb} - --job-name=smk-{rule}-{wildcards} - --output=logs/{rule}/{rule}-{wildcards}-%j.out - --time={resources.time} - --nodes={resources.nodes} + --partition={cluster.partition} + --qos={cluster.qos} + --cpus-per-task={cluster.nCPUs} + --mem={cluster.memory} + --job-name={cluster.jobname} + --output=slurm_logs/{rule}/{rule}-{wildcards}-%j.out + --time={cluster.time} + --nodes={cluster.nodes} #User Defines below parameters -default-resources: - - partition=begendiv - - qos=standard - - mem_mb=100000 - - time="3-00:00:00" - - nodes=1 +# default-resources: +# - partition=begendiv +# - qos=standard +# - mem_mb=100000 +# - time="3-00:00:00" +# - nodes=1 + + restart-times: 3 max-jobs-per-second: 100 max-status-checks-per-second: 10 -local-cores: 1 latency-wait: 60 jobs: 500 keep-going: False diff --git a/configuration/exampleSampleSheets/build_hifi_example.tsv b/configuration/exampleSampleSheets/build_hifi_example.tsv index 9373621493faf2d27d203941945d2a9d1c45abf9..8e9deade4be85207f28aa80e4ea2ae6a76659947 100644 --- a/configuration/exampleSampleSheets/build_hifi_example.tsv +++ b/configuration/exampleSampleSheets/build_hifi_example.tsv @@ -1,6 +1,3 @@ -sample hifi_reads meryl_kmer_size trimSMRTbell fastQC -ilLasFlex1.1 /srv/public/users/james94/data/ilLasFlex1.1/ERR6565937.fastq 21 True True -ilVanAtal1.2 /srv/public/users/james94/data/ilVanAtal1.2/ERR6608650.fastq 21 False True -ilCyaSemi1.1 /srv/public/users/james94/data/ilCyaSemi1.1/ERR6560795.fastq 21 False True -idScaPyra1.1 /srv/public/users/james94/data/idScaPyra1.1/ERR7057609.fastq 21 True True -idScaPyra1.1 /srv/public/users/james94/data/idScaPyra1.1/ERR7057610.fastq 21 True True +sample hifi_reads meryl_kmer_size trimSMRTbell fastQC +organismX /<pathto>/organismX_Hifi_Library1.fq 21 False True +organismX /<pathto>/organismX_Hifi_Library2.fq 21 False True diff --git a/configuration/exampleSampleSheets/build_illumina_example.tsv b/configuration/exampleSampleSheets/build_illumina_example.tsv index 3cd60f812c317a5a3727c579e718fcce24ec8eab..f102e2c80836467b695eafd2371fda751f20885b 100644 --- a/configuration/exampleSampleSheets/build_illumina_example.tsv +++ b/configuration/exampleSampleSheets/build_illumina_example.tsv @@ -1,7 +1,6 @@ -sample Library_R1 Library_R2 meryl_kmer_size trim10X trimAdapters fastQC -ilVanAtal1.2 /srv/public/users/james94/data/ilVanAtal1.2/30996_8_1_R1.trimmed.fq /srv/public/users/james94/data/ilVanAtal1.2/30996_8_1_R2.trimmed.fq 21 False False True -ilVanAtal1.2 /srv/public/users/james94/data/ilVanAtal1.2/30996_8_3_R1.trimmed.fq /srv/public/users/james94/data/ilVanAtal1.2/30996_8_3_R2.trimmed.fq 21 False False True -ilLasFlex1.1 /srv/public/users/james94/data/ilLasFlex1.1/ERR6002656_1.fastq.gz /srv/public/users/james94/data/ilLasFlex1.1/ERR6002656_2.fastq.gz 21 True True True -ilLasFlex1.1 /srv/public/users/james94/data/ilLasFlex1.1/ERR6003042_1.fastq /srv/public/users/james94/data/ilLasFlex1.1/ERR6003042_2.fastq 21 True True True -ilCyaSemi1.1 /srv/public/users/james94/data/ilCyaSemi1.1/ERR6002620_1.fastq.gz /srv/public/users/james94/data/ilCyaSemi1.1/ERR6002620_2.fastq.gz 21 True True True -ilCyaSemi1.1 /srv/public/users/james94/data/ilCyaSemi1.1/ERR6002621_1.fastq.gz /srv/public/users/james94/data/ilCyaSemi1.1/ERR6002621_2.fastq.gz 21 True True True +sample Library_R1 Library_R2 meryl_kmer_size trim10X trimAdapters fastQC +organismX /<pathto>/organismX_Library1_R1.fq /<pathto>/organismX_Library1_R2.fq 21 False False True +organismX /<pathto>/organismX_Library2_R1.fq /<pathto>/organismX_Library2_R2.fq 21 False False True +organismY /<pathto>/organismY_Library1_R1.fastq.gz /<pathto>/organismY_Library1_R2.fastq.gz 21 True True True +organismY /<pathto>/organismY_Library2_R1.fastq.gz /<pathto>/organismY_Library2_R2.fastq.gz 21 True True True +organismY /<pathto>/organismY_Library3_R1.fastq.gz /<pathto>/organismY_Library3_R2.fastq.gz 21 True True True diff --git a/configuration/exampleSampleSheets/runEval_example.tsv b/configuration/exampleSampleSheets/runEval_example.tsv index 11daf29456001015b2eb6cd587a4ae903a3414fd..6ae8666b7ac5b4310aa583c0aa5f19d96192a376 100644 --- a/configuration/exampleSampleSheets/runEval_example.tsv +++ b/configuration/exampleSampleSheets/runEval_example.tsv @@ -1,4 +1,4 @@ -ID PRI_asm ALT_asm merylDB merylDB_kmer genomeSize -ilVanAtal1.2_HifiDB /srv/public/users/james94/ilVanAtal1.2/GCA_905147765.2_ilVanAtal1.2_genomic.fna.gz None /srv/public/users/james94/ilVanAtal1.2/complete_hifi_ilVanAtal1.2_dB.smrtTrimmed.21.meryl 21 -ilLasFlex1.1_IllumDB /srv/public/users/james94/ilLasFlex1.1/GCA_905147015.1_ilLasFlex1.1_genomic.fna None /srv/public/users/james94/ilLasFlex1.1/complete_Illumina_ilLasFlex1.1_dB.10xTrimmed.AdaptTrimmed.21.meryl 21 -ilCyaSemi1.1_HifiDB /srv/public/users/james94/ilCyaSemi1.1/GCA_905187585.1_ilCyaSemi1.1_genomic.fna.gz None /srv/public/users/james94/ilCyaSemi1.1/complete_hifi_ilCyaSemi1.1_dB.smrtTrimmed.21.meryl 21 +ID PRI_asm ALT_asm merylDB merylDB_kmer genomeSize +speciesX_illumina /<pathto>/speciesX_assembly.fasta None /<pathTo>/speciesX_illumina_database.21.meryl 21 +speciesX_HiFi /<pathto>/speciesX_assembly.fasta None /<pathTo>/speciesX_hifi_database.31.meryl 31 +speciesY_illumina /<pathto>/speciesY_assembly_PrimaryHaplotype.fasta /<pathto>/speciesY_assembly_AlternateHaplotype.fasta /<pathTo>/speciesy_illumina_database.21.meryl 21 1050000 diff --git a/rules/run_05_11_21.smk b/rules/run_05_11_21.smk index fd1975784e9e0c47ff33235ceafd8d2ae81ce086..a0728058afa3fdce087e5acffc8cfd76d8e23fe8 100644 --- a/rules/run_05_11_21.smk +++ b/rules/run_05_11_21.smk @@ -38,6 +38,8 @@ rule unzipFasta_PRI: os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.PRI.pigzUnzip.log") conda: "../envs/pigz.yaml" + threads: + 1 shell: """ pigz -c -d -k {input.assembly} > {output} 2> {log} @@ -48,6 +50,8 @@ rule symlinkUnzippedFasta_PRI: assembly=PRI_asm_unzipped, output: temp(os.path.join(config['Results'], "1_evaluation/{asmID}/01_unzipFastas/{asmID}.PRI.fasta")), + threads: + 1 shell: """ ln -s {input} {output} @@ -62,6 +66,8 @@ rule unzipFasta_ALT: os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.ALT.pigzUnzip.log") conda: "../envs/pigz.yaml" + threads: + 1 shell: """ pigz -c -d -k {input.assembly} > {output} 2> {log} @@ -72,6 +78,8 @@ rule symlinkUnzippedFasta_ALT: assembly=ALT_asm_unzipped, output: temp(os.path.join(config['Results'], "1_evaluation/{asmID}/01_unzipFastas/{asmID}.ALT.fasta")), + threads: + 1 shell: """ ln -s {input} {output} @@ -87,7 +95,7 @@ rule busco5: assemblyName = "{asmID}", chngDir = os.path.join(config['Results'], "1_evaluation/{asmID}/05_BUSCO") threads: - workflow.cores * 0.25 + 16 output: # report(os.path.join(config['Results'], "{sample}" + "/busco5/" + "{sample}" + "/short_summary.specific." + config['busco5Lineage'] + "_odb10." + "{sample}" + ".txt"), caption="../report/busco.rst", category="Benchmark Universal Single Copy Orthologs", subcategory="{fastq}") os.path.join(config['Results'], "1_evaluation/{asmID}/05_BUSCO/{asmID}/short_summary.specific." + buscoDataBaseName + "_odb10.{asmID}.txt"), @@ -123,6 +131,8 @@ rule moveBuscoOutputs: # blastDB= os.path.join(config['Results'], "{sample}" + "/busco5/blast_db") output: file = os.path.join(config['Results'], "1_evaluation/{asmID}/05_BUSCO/short_summary.specific." + buscoDataBaseName + "_odb10.{asmID}.txt"), + threads: + 1 shell: """ mv -t {params.mvRunBuscoDest} {params.logs} @@ -149,7 +159,7 @@ rule merqury: # symlink_fasta=os.path.join(config['Results'], "01_evaluation/{asmID}/01B_QV-and-kmerMultiplicity/{asmID}.fasta"), symlink_merylDB=directory(os.path.join(config['Results'], "1_evaluation/{asmID}/04_merquryQVandKAT/merylDB_providedFor_{asmID}.meryl")) threads: - workflow.cores * 0.25 + 12 output: os.path.join(config['Results'],"1_evaluation/{asmID}/04_merquryQVandKAT/{asmID}_merqOutput.qv"), os.path.join(config['Results'],"1_evaluation/{asmID}/04_merquryQVandKAT/{asmID}_merqOutput.completeness.stats"), @@ -235,6 +245,8 @@ rule genomescope2: "../envs/genomescope.yaml" log: os.path.join(config['Results'],"1_evaluation/{asmID}/logs/{asmID}_k{kmer}_gscopelog.txt") + threads: + 1 shell: """ head -n 10000 {input.hist} > {params.cpHist} @@ -260,6 +272,8 @@ rule assemblyStats: path = os.path.join(config['Results'], "1_evaluation/{asmID}/02_assemblyStats/"), filename="{asmID}", given_size=lambda wildcards: expand("{genomeSize}", genomeSize=testDict[wildcards.asmID][4]) + threads: + 1 shell: """ python {params.script} {input.assembly} {input.estGenome} {params.filename} {params.given_size} {output.scaffStats} {output.contStats} @@ -299,6 +313,8 @@ rule saveConfiguration_and_getKeyValues_kmer: gscopeSum=os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/{asmID}_k{kmer}_summary.txt"), gscopeLog=os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/{asmID}_k{kmer}_log_plot.png"), gscopeLin=os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/{asmID}_k{kmer}_linear_plot.png") + threads: + 1 # os.path.join(config['Results'],"{sample}" + "/QVstats_merylAndMerqury/logs/" + "{sample}" + "_meryl.log"), # multiqc=os.path.join(config['Results'],"{sample}/5_31_Key_Results/{sample}_multiqc_report.html") # aggregateTsv=os.path.join(config['Results'],"{sample}"+ "/individual_aggregated_results/aggregate.tsv") @@ -356,6 +372,8 @@ rule saveConfiguration_and_getKeyValues: # os.path.join(config['Results'],"{sample}" + "/assemblyStats/{sample}_contig_stats.tsv"), scaffStats=os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_scaffold_stats.tsv"), contStats=os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_contig_stats.tsv") + threads: + 1 # os.path.join(config['Results'],"{sample}" + "/QVstats_merylAndMerqury/logs/" + "{sample}" + "_meryl.log"), # multiqc=os.path.join(config['Results'],"{sample}/5_31_Key_Results/{sample}_multiqc_report.html") # aggregateTsv=os.path.join(config['Results'],"{sample}"+ "/individual_aggregated_results/aggregate.tsv") @@ -502,6 +520,8 @@ rule aggregateAllAssemblies: # rows=os.path.join(config['Results'],"allAssemblies_keyResults/aggregate_rows.tsv"), newSampleSheet=os.path.join(config['Results'],"1_evaluation/finalResults/savedSampleSheet.tsv"), newConfigFile=os.path.join(config['Results'],"1_evaluation/finalResults/savedConfig.yaml") + threads: + 1 shell: """ cp {input.sampleSheet} {output.newSampleSheet} @@ -524,7 +544,9 @@ rule makeReport: os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_merqOutput.completeness.stats"), os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/only_buscoScores_{asmID}.txt"), os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_merqOutput.{asmID}.PRI.spectra-cn.fl.png"), - os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_merqOutput.{asmID}.PRI.spectra-cn.st.png") + os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_merqOutput.spectra-cn.fl.png") + threads: + 1 params: "{asmID}", "{kmer}", @@ -563,6 +585,8 @@ rule addFullTable: results=os.path.join(config['Results'],"1_evaluation/finalResults/Combined_Results_FULLTABLE.tsv") output: os.path.join(config['Results'],"1_evaluation/finalResults/FullTableMarkdown.md") + threads: + 1 run: import pandas as pd from tabulate import tabulate @@ -582,6 +606,8 @@ rule aggregateReport: endTableMD=os.path.join(config['Results'],"1_evaluation/finalResults/FullTableMarkdown.md") output: os.path.join(config['Results'],"1_evaluation/finalResults/FullMarkdown.md") + threads: + 1 shell: """ cat {input.landingPage} {input.indivMD} {input.endTableMD} >> {output} @@ -597,6 +623,8 @@ rule makePDF: os.path.join(config['Results'],"1_evaluation/finalResults/FullMarkdown.md") output: os.path.join(config['Results'],"1_evaluation/finalResults/FULL_Report_PDF.pdf") + threads: + 1 shell: """ pandoc -o {output} {input} diff --git a/scripts/makePDF_indivMD.py b/scripts/makePDF_indivMD.py index f295419b11dbcfea8fae67313dc27d12e8aa9773..f7aa89d6e4e1c395f30362f698e7cf837c01ab79 100644 --- a/scripts/makePDF_indivMD.py +++ b/scripts/makePDF_indivMD.py @@ -81,7 +81,7 @@ with open(snakemake.output[0], 'w') as outFile: print("### BUSCOv5 (database: ", params_buscoDB, ")", file=outFile) print("```", file=outFile) for line in lines: - print(line, file=outFile) + print(line, file=outFile) print("```", file=outFile) print("\\", file=outFile) print("", file=outFile) @@ -89,7 +89,7 @@ with open(snakemake.output[0], 'w') as outFile: print("{ width=38% }", file=outFile) print("\\", file=outFile) print("", file=outFile) - print("### K-mer Multiplicity PRI and ALT (Flattened)", file=outFile) + print("### K-mer Multiplicity PRI (Stacked)", file=outFile) print("{ width=38% }", file=outFile) print("\\", file=outFile) print("\\pagebreak", file=outFile)