Skip to content
Snippets Groups Projects
Commit bd5ee96d authored by james94's avatar james94
Browse files

slurm optimisation

parent dc1d4023
No related branches found
No related tags found
No related merge requests found
__default__:
jobname: 'GEP.{rule}'
partition: begendiv
nCPUs: "{threads}"
qos: 'standard'
nodes: 1
unzipFasta_PRI:
jobname: GEP.{rule}.{wildcards.asmID}
memory: 500
time: "00:00:05"
symlinkUnzippedFasta_PRI:
jobname: GEP.{rule}.{wildcards.asmID}
memory: 500
time: "00:00:05"
unzipFasta_ALT:
jobname: GEP.{rule}.{wildcards.asmID}
memory: 500
time: "00:00:05"
symlinkUnzippedFasta_ALT:
jobname: GEP.{rule}.{wildcards.asmID}
memory: 500
time: "00:00:05"
busco5:
jobname: GEP.{rule}.{wildcards.asmID}
memory: 100000
time: "1-00:00:00"
moveBuscoOutputs:
jobname: GEP.{rule}.{wildcards.asmID}
memory: 500
time: "00:00:05"
merqury:
jobname: GEP.{rule}.{wildcards.asmID}
memory: 20000
time: "08:00:00"
genomescope2:
jobname: GEP.{rule}.{wildcards.asmID}.{wildcards.kmer}
memory: 500
time: "00:00:05"
assemblyStats:
jobname: GEP.{rule}.{wildcards.asmID}
memory: 500
time: "00:00:05"
saveConfiguration_and_getKeyValues_kmer:
jobname: GEP.{rule}.{wildcards.asmID}.{wildcards.kmer}
memory: 500
time: "00:00:05"
saveConfiguration_and_getKeyValues:
jobname: GEP.{rule}.{wildcards.asmID}
memory: 500
time: "00:00:05"
aggregateAllAssemblies:
jobname: GEP.{rule}
memory: 500
time: "00:00:05"
makeReport:
jobname: GEP.{rule}.{wildcards.asmID}.{wildcards.kmer}
memory: 500
time: "00:00:05"
addFullTable:
jobname: GEP.{rule}
memory: 500
time: "00:00:05"
aggregateReport:
jobname: GEP.{rule}
memory: 500
time: "00:00:05"
makePDF:
jobname: GEP.{rule}
memory: 500
time: "00:00:05"
cluster-config: "cluster.yaml"
cluster:
mkdir -p logs/{rule} &&
mkdir -p slurm_logs/{rule} &&
sbatch
--partition={resources.partition}
--qos={resources.qos}
--cpus-per-task={threads}
--mem={resources.mem_mb}
--job-name=smk-{rule}-{wildcards}
--output=logs/{rule}/{rule}-{wildcards}-%j.out
--time={resources.time}
--nodes={resources.nodes}
--partition={cluster.partition}
--qos={cluster.qos}
--cpus-per-task={cluster.nCPUs}
--mem={cluster.memory}
--job-name={cluster.jobname}
--output=slurm_logs/{rule}/{rule}-{wildcards}-%j.out
--time={cluster.time}
--nodes={cluster.nodes}
#User Defines below parameters
default-resources:
- partition=begendiv
- qos=standard
- mem_mb=100000
- time="3-00:00:00"
- nodes=1
# default-resources:
# - partition=begendiv
# - qos=standard
# - mem_mb=100000
# - time="3-00:00:00"
# - nodes=1
restart-times: 3
max-jobs-per-second: 100
max-status-checks-per-second: 10
local-cores: 1
latency-wait: 60
jobs: 500
keep-going: False
......
sample hifi_reads meryl_kmer_size trimSMRTbell fastQC
ilLasFlex1.1 /srv/public/users/james94/data/ilLasFlex1.1/ERR6565937.fastq 21 True True
ilVanAtal1.2 /srv/public/users/james94/data/ilVanAtal1.2/ERR6608650.fastq 21 False True
ilCyaSemi1.1 /srv/public/users/james94/data/ilCyaSemi1.1/ERR6560795.fastq 21 False True
idScaPyra1.1 /srv/public/users/james94/data/idScaPyra1.1/ERR7057609.fastq 21 True True
idScaPyra1.1 /srv/public/users/james94/data/idScaPyra1.1/ERR7057610.fastq 21 True True
sample hifi_reads meryl_kmer_size trimSMRTbell fastQC
organismX /<pathto>/organismX_Hifi_Library1.fq 21 False True
organismX /<pathto>/organismX_Hifi_Library2.fq 21 False True
sample Library_R1 Library_R2 meryl_kmer_size trim10X trimAdapters fastQC
ilVanAtal1.2 /srv/public/users/james94/data/ilVanAtal1.2/30996_8_1_R1.trimmed.fq /srv/public/users/james94/data/ilVanAtal1.2/30996_8_1_R2.trimmed.fq 21 False False True
ilVanAtal1.2 /srv/public/users/james94/data/ilVanAtal1.2/30996_8_3_R1.trimmed.fq /srv/public/users/james94/data/ilVanAtal1.2/30996_8_3_R2.trimmed.fq 21 False False True
ilLasFlex1.1 /srv/public/users/james94/data/ilLasFlex1.1/ERR6002656_1.fastq.gz /srv/public/users/james94/data/ilLasFlex1.1/ERR6002656_2.fastq.gz 21 True True True
ilLasFlex1.1 /srv/public/users/james94/data/ilLasFlex1.1/ERR6003042_1.fastq /srv/public/users/james94/data/ilLasFlex1.1/ERR6003042_2.fastq 21 True True True
ilCyaSemi1.1 /srv/public/users/james94/data/ilCyaSemi1.1/ERR6002620_1.fastq.gz /srv/public/users/james94/data/ilCyaSemi1.1/ERR6002620_2.fastq.gz 21 True True True
ilCyaSemi1.1 /srv/public/users/james94/data/ilCyaSemi1.1/ERR6002621_1.fastq.gz /srv/public/users/james94/data/ilCyaSemi1.1/ERR6002621_2.fastq.gz 21 True True True
sample Library_R1 Library_R2 meryl_kmer_size trim10X trimAdapters fastQC
organismX /<pathto>/organismX_Library1_R1.fq /<pathto>/organismX_Library1_R2.fq 21 False False True
organismX /<pathto>/organismX_Library2_R1.fq /<pathto>/organismX_Library2_R2.fq 21 False False True
organismY /<pathto>/organismY_Library1_R1.fastq.gz /<pathto>/organismY_Library1_R2.fastq.gz 21 True True True
organismY /<pathto>/organismY_Library2_R1.fastq.gz /<pathto>/organismY_Library2_R2.fastq.gz 21 True True True
organismY /<pathto>/organismY_Library3_R1.fastq.gz /<pathto>/organismY_Library3_R2.fastq.gz 21 True True True
ID PRI_asm ALT_asm merylDB merylDB_kmer genomeSize
ilVanAtal1.2_HifiDB /srv/public/users/james94/ilVanAtal1.2/GCA_905147765.2_ilVanAtal1.2_genomic.fna.gz None /srv/public/users/james94/ilVanAtal1.2/complete_hifi_ilVanAtal1.2_dB.smrtTrimmed.21.meryl 21
ilLasFlex1.1_IllumDB /srv/public/users/james94/ilLasFlex1.1/GCA_905147015.1_ilLasFlex1.1_genomic.fna None /srv/public/users/james94/ilLasFlex1.1/complete_Illumina_ilLasFlex1.1_dB.10xTrimmed.AdaptTrimmed.21.meryl 21
ilCyaSemi1.1_HifiDB /srv/public/users/james94/ilCyaSemi1.1/GCA_905187585.1_ilCyaSemi1.1_genomic.fna.gz None /srv/public/users/james94/ilCyaSemi1.1/complete_hifi_ilCyaSemi1.1_dB.smrtTrimmed.21.meryl 21
ID PRI_asm ALT_asm merylDB merylDB_kmer genomeSize
speciesX_illumina /<pathto>/speciesX_assembly.fasta None /<pathTo>/speciesX_illumina_database.21.meryl 21
speciesX_HiFi /<pathto>/speciesX_assembly.fasta None /<pathTo>/speciesX_hifi_database.31.meryl 31
speciesY_illumina /<pathto>/speciesY_assembly_PrimaryHaplotype.fasta /<pathto>/speciesY_assembly_AlternateHaplotype.fasta /<pathTo>/speciesy_illumina_database.21.meryl 21 1050000
......@@ -38,6 +38,8 @@ rule unzipFasta_PRI:
os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.PRI.pigzUnzip.log")
conda:
"../envs/pigz.yaml"
threads:
1
shell:
"""
pigz -c -d -k {input.assembly} > {output} 2> {log}
......@@ -48,6 +50,8 @@ rule symlinkUnzippedFasta_PRI:
assembly=PRI_asm_unzipped,
output:
temp(os.path.join(config['Results'], "1_evaluation/{asmID}/01_unzipFastas/{asmID}.PRI.fasta")),
threads:
1
shell:
"""
ln -s {input} {output}
......@@ -62,6 +66,8 @@ rule unzipFasta_ALT:
os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.ALT.pigzUnzip.log")
conda:
"../envs/pigz.yaml"
threads:
1
shell:
"""
pigz -c -d -k {input.assembly} > {output} 2> {log}
......@@ -72,6 +78,8 @@ rule symlinkUnzippedFasta_ALT:
assembly=ALT_asm_unzipped,
output:
temp(os.path.join(config['Results'], "1_evaluation/{asmID}/01_unzipFastas/{asmID}.ALT.fasta")),
threads:
1
shell:
"""
ln -s {input} {output}
......@@ -87,7 +95,7 @@ rule busco5:
assemblyName = "{asmID}",
chngDir = os.path.join(config['Results'], "1_evaluation/{asmID}/05_BUSCO")
threads:
workflow.cores * 0.25
16
output:
# report(os.path.join(config['Results'], "{sample}" + "/busco5/" + "{sample}" + "/short_summary.specific." + config['busco5Lineage'] + "_odb10." + "{sample}" + ".txt"), caption="../report/busco.rst", category="Benchmark Universal Single Copy Orthologs", subcategory="{fastq}")
os.path.join(config['Results'], "1_evaluation/{asmID}/05_BUSCO/{asmID}/short_summary.specific." + buscoDataBaseName + "_odb10.{asmID}.txt"),
......@@ -123,6 +131,8 @@ rule moveBuscoOutputs:
# blastDB= os.path.join(config['Results'], "{sample}" + "/busco5/blast_db")
output:
file = os.path.join(config['Results'], "1_evaluation/{asmID}/05_BUSCO/short_summary.specific." + buscoDataBaseName + "_odb10.{asmID}.txt"),
threads:
1
shell:
"""
mv -t {params.mvRunBuscoDest} {params.logs}
......@@ -149,7 +159,7 @@ rule merqury:
# symlink_fasta=os.path.join(config['Results'], "01_evaluation/{asmID}/01B_QV-and-kmerMultiplicity/{asmID}.fasta"),
symlink_merylDB=directory(os.path.join(config['Results'], "1_evaluation/{asmID}/04_merquryQVandKAT/merylDB_providedFor_{asmID}.meryl"))
threads:
workflow.cores * 0.25
12
output:
os.path.join(config['Results'],"1_evaluation/{asmID}/04_merquryQVandKAT/{asmID}_merqOutput.qv"),
os.path.join(config['Results'],"1_evaluation/{asmID}/04_merquryQVandKAT/{asmID}_merqOutput.completeness.stats"),
......@@ -235,6 +245,8 @@ rule genomescope2:
"../envs/genomescope.yaml"
log:
os.path.join(config['Results'],"1_evaluation/{asmID}/logs/{asmID}_k{kmer}_gscopelog.txt")
threads:
1
shell:
"""
head -n 10000 {input.hist} > {params.cpHist}
......@@ -260,6 +272,8 @@ rule assemblyStats:
path = os.path.join(config['Results'], "1_evaluation/{asmID}/02_assemblyStats/"),
filename="{asmID}",
given_size=lambda wildcards: expand("{genomeSize}", genomeSize=testDict[wildcards.asmID][4])
threads:
1
shell:
"""
python {params.script} {input.assembly} {input.estGenome} {params.filename} {params.given_size} {output.scaffStats} {output.contStats}
......@@ -299,6 +313,8 @@ rule saveConfiguration_and_getKeyValues_kmer:
gscopeSum=os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/{asmID}_k{kmer}_summary.txt"),
gscopeLog=os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/{asmID}_k{kmer}_log_plot.png"),
gscopeLin=os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/{asmID}_k{kmer}_linear_plot.png")
threads:
1
# os.path.join(config['Results'],"{sample}" + "/QVstats_merylAndMerqury/logs/" + "{sample}" + "_meryl.log"),
# multiqc=os.path.join(config['Results'],"{sample}/5_31_Key_Results/{sample}_multiqc_report.html")
# aggregateTsv=os.path.join(config['Results'],"{sample}"+ "/individual_aggregated_results/aggregate.tsv")
......@@ -356,6 +372,8 @@ rule saveConfiguration_and_getKeyValues:
# os.path.join(config['Results'],"{sample}" + "/assemblyStats/{sample}_contig_stats.tsv"),
scaffStats=os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_scaffold_stats.tsv"),
contStats=os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_contig_stats.tsv")
threads:
1
# os.path.join(config['Results'],"{sample}" + "/QVstats_merylAndMerqury/logs/" + "{sample}" + "_meryl.log"),
# multiqc=os.path.join(config['Results'],"{sample}/5_31_Key_Results/{sample}_multiqc_report.html")
# aggregateTsv=os.path.join(config['Results'],"{sample}"+ "/individual_aggregated_results/aggregate.tsv")
......@@ -502,6 +520,8 @@ rule aggregateAllAssemblies:
# rows=os.path.join(config['Results'],"allAssemblies_keyResults/aggregate_rows.tsv"),
newSampleSheet=os.path.join(config['Results'],"1_evaluation/finalResults/savedSampleSheet.tsv"),
newConfigFile=os.path.join(config['Results'],"1_evaluation/finalResults/savedConfig.yaml")
threads:
1
shell:
"""
cp {input.sampleSheet} {output.newSampleSheet}
......@@ -524,7 +544,9 @@ rule makeReport:
os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_merqOutput.completeness.stats"),
os.path.join(config['Results'], "1_evaluation/{asmID}/06_keyResults/only_buscoScores_{asmID}.txt"),
os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_merqOutput.{asmID}.PRI.spectra-cn.fl.png"),
os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_merqOutput.{asmID}.PRI.spectra-cn.st.png")
os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_merqOutput.spectra-cn.fl.png")
threads:
1
params:
"{asmID}",
"{kmer}",
......@@ -563,6 +585,8 @@ rule addFullTable:
results=os.path.join(config['Results'],"1_evaluation/finalResults/Combined_Results_FULLTABLE.tsv")
output:
os.path.join(config['Results'],"1_evaluation/finalResults/FullTableMarkdown.md")
threads:
1
run:
import pandas as pd
from tabulate import tabulate
......@@ -582,6 +606,8 @@ rule aggregateReport:
endTableMD=os.path.join(config['Results'],"1_evaluation/finalResults/FullTableMarkdown.md")
output:
os.path.join(config['Results'],"1_evaluation/finalResults/FullMarkdown.md")
threads:
1
shell:
"""
cat {input.landingPage} {input.indivMD} {input.endTableMD} >> {output}
......@@ -597,6 +623,8 @@ rule makePDF:
os.path.join(config['Results'],"1_evaluation/finalResults/FullMarkdown.md")
output:
os.path.join(config['Results'],"1_evaluation/finalResults/FULL_Report_PDF.pdf")
threads:
1
shell:
"""
pandoc -o {output} {input}
......
......@@ -81,7 +81,7 @@ with open(snakemake.output[0], 'w') as outFile:
print("### BUSCOv5 (database: ", params_buscoDB, ")", file=outFile)
print("```", file=outFile)
for line in lines:
print(line, file=outFile)
print(line, file=outFile)
print("```", file=outFile)
print("\\", file=outFile)
print("", file=outFile)
......@@ -89,7 +89,7 @@ with open(snakemake.output[0], 'w') as outFile:
print("![](", kmer_flat, "){ width=38% }", file=outFile)
print("\\", file=outFile)
print("", file=outFile)
print("### K-mer Multiplicity PRI and ALT (Flattened)", file=outFile)
print("### K-mer Multiplicity PRI (Stacked)", file=outFile)
print("![](", kmer_stacked, "){ width=38% }", file=outFile)
print("\\", file=outFile)
print("\\pagebreak", file=outFile)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment