diff --git a/rules/build_hifi.smk b/rules/build_hifi.smk index 6c616e58a4e259e71088011e4524aa41c7fb7493..7dca6234f94cd222dfb4c4e116daf402af4310aa 100644 --- a/rules/build_hifi.smk +++ b/rules/build_hifi.smk @@ -1,5 +1,7 @@ -localrules: symlinkUnzippedHifi, symlinkfornotSmartTrimmed, multiqc_hifi +localrules: symlink_UnzippedFastq_hifi, \ + symlink_noSMRTBellAdaptTrim_hifi, \ + multiQC_hifi @@ -20,62 +22,62 @@ def hifi_notgzipped(wildcards): -rule unzipHifi: +rule unzipFastq_hifi: input: fastq=hifi_gzipped, output: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/temp_unzipFastqs/{readCounter}.{smrtornot}.fastq"), log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/logs/hifiReads/{readCounter}.{smrtornot}_pigzUnzip.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/pigzUnzip.{readCounter}.{smrtornot}.log") conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") threads: - resource['unzipHifi']['threads'] + resource['unzipFastq_hifi']['threads'] resources: - mem_mb=resource['unzipHifi']['mem_mb'], - time=resource['unzipHifi']['time'], + mem_mb=resource['unzipFastq_hifi']['mem_mb'], + time=resource['unzipFastq_hifi']['time'], shell: """ pigz -p {threads} -c -d -k {input.fastq} > {output} 2> {log} """ -rule symlinkUnzippedHifi: +rule symlink_UnzippedFastq_hifi: input: fastq=hifi_notgzipped, output: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/temp_unzipFastqs/{readCounter}.{smrtornot}.fastq"), log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/logs/hifiReads/{readCounter}.{smrtornot}_pigzUnzip.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/pigzUnzip.{readCounter}.{smrtornot}.log") container: None shell: """ ln -s {input.fastq} {output} - echo "{input.fastq} no gzipped. Symlink created in place of expected decompressed file." > {log} + echo "{input.fastq} not gzipped. Symlink created in place of expected decompressed file." > {log} """ -rule trimSMRTbell: +rule trimSMRTBellAdapters_hifi: input: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/temp_unzipFastqs/{readCounter}.smrtTrimmed.fastq"), output: outputFile=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/{readCounter}.smrtTrimmed.fastq") threads: - resource['trimSMRTbell']['threads'] + resource['trimSMRTBellAdapters_hifi']['threads'] resources: - mem_mb=resource['trimSMRTbell']['mem_mb'], - time=resource['trimSMRTbell']['time'], + mem_mb=resource['trimSMRTBellAdapters_hifi']['mem_mb'], + time=resource['trimSMRTBellAdapters_hifi']['time'], log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/logs/hifiReads/{readCounter}_trimSMRTbell.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/trimSMRTbell.{readCounter}.log") priority: 15 conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") shell: """ - (cutadapt -j {threads} -o {output.outputFile} {input} -b ATCTCTCTCAACAACAACAACGGAGGAGGAGGAAAAGAGAGAGAT -b ATCTCTCTCTTTTCCTCCTCCTCCGTTGTTGTTGTTGAGAGAGAT --discard-trimmed) &> {log} + (cutadapt -j {threads} -o {output.outputFile} {input} -b AAAAAAAAAAAAAAAAAATTAACGGAGGAGGAGGA --overlap 35 -b ATCTCTCTCTTTTCCTCCTCCTCCGTTGTTGTTGTTGAGAGAGAT --overlap 45 --revcomp -e 0.1 --discard-trimmed) &> {log} """ -rule symlinkfornotSmartTrimmed: +rule symlink_noSMRTBellAdaptTrim_hifi: input: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/temp_unzipFastqs/{readCounter}.notsmrtTrimmed.fastq"), output: @@ -87,7 +89,7 @@ rule symlinkfornotSmartTrimmed: ln -s {input} {output.outputFile} """ -rule fastqc_hifi: +rule fastQC_hifi: input: os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/{readCounter}.{smrtornot}.fastq") params: @@ -95,20 +97,20 @@ rule fastqc_hifi: output: os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/fastqc/{readCounter}.{smrtornot}_fastqc.html") threads: - resource['trimSMRTbell']['threads'] + resource['fastQC_hifi']['threads'] resources: - mem_mb=resource['fastqc_hifi']['mem_mb'], - time=resource['fastqc_hifi']['time'], + mem_mb=resource['fastQC_hifi']['mem_mb'], + time=resource['fastQC_hifi']['time'], log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{readCounter}.{smrtornot}.FastQC.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/fastQC.hifi.{readCounter}.{smrtornot}.log") conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") shell: """ (fastqc {input} -o {params.folder2out} -t {threads}) &> {log} """ -rule multiqc_hifi: +rule multiQC_hifi: input: lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/fastqc/{readCounter}.{smrtornot}_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], smrtornot=dictSamples[wildcards.sample][1]) params: @@ -118,37 +120,37 @@ rule multiqc_hifi: output: os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/multiqc/{sample}.multiqcReport.html") log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{sample}.multiqc.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/multiQC.{sample}.log") conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") shell: "(multiqc {params.folder2qc} -o {params.folder2OUT} -n {params.filename}) &> {log}" -rule meryl_hifi_count: +rule merylCount_hifi: input: reads=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/{readCounter}.{smrtornot}.fastq") params: kmer = "{kmer}" threads: - resource['meryl_hifi_count']['threads'] + resource['merylCount_hifi']['threads'] resources: - mem_mb=resource['meryl_hifi_count']['mem_mb'], - time=resource['meryl_hifi_count']['time'], + mem_mb=resource['merylCount_hifi']['mem_mb'], + time=resource['merylCount_hifi']['time'], output: temp(directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/merylDb/" + "{readCounter}" + "_hifi_dB.{smrtornot}.{kmer}.meryl"))), log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{readCounter}_hifi_{kmer}.{smrtornot}.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/meryl_hifi_count.{readCounter}.{kmer}.{smrtornot}.log") priority: 10 conda: - os.path.join(workflow.basedir, "envs/merylMerq_2.yaml") + os.path.join(workflow.basedir, "envs/MERYL_MERQURY.yaml") shell: """ (meryl count k={params.kmer} threads={threads} {input.reads} output {output}) &> {log} """ -rule meryl_hifi_build: +rule merylUnion_hifi: input: lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/merylDb/{readCounter}_hifi_dB.{smrtornot}.{kmer}.meryl/"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], kmer=dictSamples[wildcards.sample][0], smrtornot=dictSamples[wildcards.sample][1]) params: @@ -156,18 +158,18 @@ rule meryl_hifi_build: removeReadDIR_trimmed=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/"), removeReadDIR_unzipped=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_unzipFastqs/") threads: - resource['meryl_hifi_build']['threads'] + resource['merylUnion_hifi']['threads'] resources: - mem_mb=resource['meryl_hifi_build']['mem_mb'], - time=resource['meryl_hifi_build']['time'], + mem_mb=resource['merylUnion_hifi']['mem_mb'], + time=resource['merylUnion_hifi']['time'], output: directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/merylDb/complete_hifi.{sample}.{kmer}.meryl")), log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{sample}.meryl_hifi.{kmer}.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/meryl_hifi_combine.{sample}.{kmer}.log") priority: 10 conda: - os.path.join(workflow.basedir, "envs/merylMerq_2.yaml") + os.path.join(workflow.basedir, "envs/MERYL_MERQURY.yaml") shell: """ (meryl union-sum {input} output {output}) &> {log} diff --git a/rules/build_illumina.smk b/rules/build_illumina.smk index c1bb0e9eddc56bb9bf37a8987423d6ed80363956..0680f4a9db7a157cf824298b3311df1d764c459c 100644 --- a/rules/build_illumina.smk +++ b/rules/build_illumina.smk @@ -2,7 +2,13 @@ -localrules: symlinkUnzippedFastq_R1, symlinkUnzippedFastq_R2, symLink_trim10xbarcodes_notrimAdapt, symlinks_no10xwithAdaptTrim, symlinks_no10xOrAdaptTrim, symlink_trim10xbarcodesR2, multiqc_hifi +localrules: symlink_UnzippedFastq_R1_illumina,\ + symlink_UnzippedFastq_R2_illumina, \ + symLink_Trim10xBarcodes_noSequencingAdaptTrim_illumina, \ + symlink_No10xWithSequencingAdaptTrim_illumina, \ + symlink_No10xOrSequencingAdaptTrim_illumina, \ + symlink_Trim10xBarcodes_R2_illumina, \ + multiQC_illumina @@ -19,7 +25,7 @@ def R2_gzipped(wildcards): def R2_notgzipped(wildcards): return noGzip_R2.loc[(wildcards.sample, wildcards.readCounter), "Library_R2"] -rule unzipFastq_R1: +rule unzipFastq_R1_illumina: input: assembly=R1_gzipped, output: @@ -27,18 +33,18 @@ rule unzipFastq_R1: log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_R1_pigzUnzip.log"), conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") threads: - resource['unzipFastq_R1']['threads'] + resource['unzipFastq_R1_illumina']['threads'] resources: - mem_mb=resource['unzipFastq_R1']['mem_mb'], - time=resource['unzipFastq_R1']['time'] + mem_mb=resource['unzipFastq_R1_illumina']['mem_mb'], + time=resource['unzipFastq_R1_illumina']['time'] shell: """ pigz -p {threads} -c -d -k {input.assembly} > {output} 2> {log} """ -rule symlinkUnzippedFastq_R1: +rule symlink_UnzippedFastq_R1_illumina: input: assembly=R1_notgzipped, output: @@ -50,7 +56,7 @@ rule symlinkUnzippedFastq_R1: ln -s {input} {output} """ -rule unzipFastq_R2: +rule unzipFastq_R2_illumina: input: assembly=R2_gzipped, output: @@ -58,18 +64,18 @@ rule unzipFastq_R2: log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_R2_pigzUnzip.log") conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") threads: - resource['unzipFastq_R2']['threads'] + resource['unzipFastq_R2_illumina']['threads'] resources: - mem_mb=resource['unzipFastq_R2']['mem_mb'], - time=resource['unzipFastq_R2']['time'] + mem_mb=resource['unzipFastq_R2_illumina']['mem_mb'], + time=resource['unzipFastq_R2_illumina']['time'] shell: """ pigz -p {threads} -c -d -k {input.assembly} > {output} 2> {log} """ -rule symlinkUnzippedFastq_R2: +rule symlink_UnzippedFastq_R2_illumina: input: assembly=R2_notgzipped, output: @@ -82,27 +88,27 @@ rule symlinkUnzippedFastq_R2: """ -rule trim10xbarcodes: +rule trim10xBarcodes_illumina: input: read1=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.10xTrimmed.{trimAdapters}_R1.fastq"), output: read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.10xTrimmed.{trimAdapters}_Read1.fastq"), threads: - resource['trim10xbarcodes']['threads'] + resource['trim10xBarcodes_illumina']['threads'] resources: - mem_mb=resource['trim10xbarcodes']['mem_mb'], - time=resource['trim10xbarcodes']['time'] + mem_mb=resource['trim10xBarcodes_illumina']['mem_mb'], + time=resource['trim10xBarcodes_illumina']['time'] log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.10xTrimmed.10BarcodeRemoval_Trimmomatic.{trimAdapters}.log") conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") shell: """ (trimmomatic SE -threads {threads} {input.read1} {output.read1} HEADCROP:23) &> {log} """ -rule symlink_trim10xbarcodesR2: +rule symlink_Trim10xBarcodes_R2_illumina: input: read2=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.10xTrimmed.{trimAdapters}_R2.fastq") output: @@ -116,7 +122,7 @@ rule symlink_trim10xbarcodesR2: -rule symLink_trim10xbarcodes_notrimAdapt: +rule symLink_Trim10xBarcodes_noSequencingAdaptTrim_illumina: input: read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_Read1.fastq"), read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_Read2.fastq"), @@ -131,7 +137,7 @@ rule symLink_trim10xbarcodes_notrimAdapt: ln -s {input.read2} {output.read2} """ -rule symlinks_no10xOrAdaptTrim: +rule symlink_No10xOrSequencingAdaptTrim_illumina: input: read1=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.not10xTrimmed.notAdaptTrimmed_R1.fastq"), read2=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.not10xTrimmed.notAdaptTrimmed_R2.fastq") @@ -146,7 +152,7 @@ rule symlinks_no10xOrAdaptTrim: ln -s {input.read2} {output.read2} """ -rule symlinks_no10xwithAdaptTrim: +rule symlink_No10xWithSequencingAdaptTrim_illumina: input: read1=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.not10xTrimmed.AdaptTrimmed_R1.fastq"), read2=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.not10xTrimmed.AdaptTrimmed_R2.fastq") @@ -161,7 +167,7 @@ rule symlinks_no10xwithAdaptTrim: ln -s {input.read2} {output.read2} """ -rule trimAdapters: +rule trimSequencingAdapters_illumina: input: read1= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_Read1.fastq"), read2= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_Read2.fastq"), @@ -172,20 +178,20 @@ rule trimAdapters: read1=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_val_1.fq")), read2=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_val_2.fq")) threads: - resource['trimAdapters']['threads'] + resource['trimSequencingAdapters_illumina']['threads'] resources: - mem_mb=resource['trimAdapters']['mem_mb'], - time=resource['trimAdapters']['time'], + mem_mb=resource['trimSequencingAdapters_illumina']['mem_mb'], + time=resource['trimSequencingAdapters_illumina']['time'], log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.AdaptTrimmed_tGalore.log") conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") shell: """ (trim_galore -j {threads} --basename {params.r1_prefix} --dont_gzip --length 65 -o {params.outputDir} --paired {input.read1} {input.read2}) &> {log} """ -rule fastqc_Illumina: +rule fastQC_illumina: input: read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_1.fq"), read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_2.fq") @@ -197,16 +203,16 @@ rule fastqc_Illumina: log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_fastqc.log") threads: - resource['fastqc_Illumina']['threads'] + resource['fastQC_illumina']['threads'] resources: - mem_mb=resource['fastqc_Illumina']['mem_mb'], - time=resource['fastqc_Illumina']['time'], + mem_mb=resource['fastQC_illumina']['mem_mb'], + time=resource['fastQC_illumina']['time'], conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") shell: "(fastqc {input} -o {params.folder2out} -t {threads}) &> {log}" -rule multiqc_hifi: +rule multiQC_illumina: input: read1=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/QC/fastqc/{readCounter}.{trim10x}.{trimAdapters}_val_1_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]), read2=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/QC/fastqc/{readCounter}.{trim10x}.{trimAdapters}_val_2_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]) @@ -219,22 +225,22 @@ rule multiqc_hifi: log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{sample}.multiqc.log") conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") shell: "(multiqc {params.folder2qc} -o {params.folder2out} -n {params.filename}) &> {log}" -rule meryl_R1: +rule merylCount_R1_illumina: input: read1= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_1.fq") params: kmer = "{kmer}", threads: - resource['meryl_R1']['threads'] + resource['merylCount_R1_illumina']['threads'] resources: - mem_mb=resource['meryl_R1']['mem_mb'], - time=resource['meryl_R1']['time'], + mem_mb=resource['merylCount_R1_illumina']['mem_mb'], + time=resource['merylCount_R1_illumina']['time'], output: temp(directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/merylDb/{readCounter}.{trim10x}.{trimAdapters}_R1.{kmer}.meryl"))) log: @@ -242,23 +248,23 @@ rule meryl_R1: priority: 10 conda: - os.path.join(workflow.basedir, "envs/merylMerq_2.yaml") + os.path.join(workflow.basedir, "envs/MERYL_MERQURY.yaml") shell: """ export OMP_NUM_THREADS={threads} (meryl count k={params.kmer} threads={threads} {input.read1} output {output}) &> {log} """ -rule meryl_R2: +rule merylCount_R2_illumina: input: read2= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_2.fq") params: kmer = "{kmer}", threads: - resource['meryl_R2']['threads'] + resource['merylCount_R2_illumina']['threads'] resources: - mem_mb=resource['meryl_R2']['mem_mb'], - time=resource['meryl_R2']['time'], + mem_mb=resource['merylCount_R2_illumina']['mem_mb'], + time=resource['merylCount_R2_illumina']['time'], output: temp(directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/merylDb/{readCounter}.{trim10x}.{trimAdapters}_R2.{kmer}.meryl"))) log: @@ -266,7 +272,7 @@ rule meryl_R2: priority: 10 conda: - os.path.join(workflow.basedir, "envs/merylMerq_2.yaml") + os.path.join(workflow.basedir, "envs/MERYL_MERQURY.yaml") shell: """ export OMP_NUM_THREADS={threads} @@ -274,7 +280,7 @@ rule meryl_R2: """ -rule meryl_illumina_build: +rule merylUnion_illumina: input: # removeReads1=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_1.fq"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]), # removeReads2=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_2.fq"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]), @@ -286,10 +292,10 @@ rule meryl_illumina_build: kmer = "{kmer}", path= os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/merylDb/") threads: - resource['meryl_illumina_build']['threads'] + resource['merylUnion_illumina']['threads'] resources: - mem_mb=resource['meryl_illumina_build']['mem_mb'], - time=resource['meryl_illumina_build']['time'], + mem_mb=resource['merylUnion_illumina']['mem_mb'], + time=resource['merylUnion_illumina']['time'], output: directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/merylDb/complete_illumina.{sample}.{kmer}.meryl")), log: @@ -297,7 +303,7 @@ rule meryl_illumina_build: priority: 10 conda: - os.path.join(workflow.basedir, "envs/merylMerq_2.yaml") + os.path.join(workflow.basedir, "envs/MERYL_MERQURY.yaml") shell: """ export OMP_NUM_THREADS={threads} diff --git a/rules/evaluate.smk b/rules/evaluate.smk index c6813378d95810d885b7b141fe7ddb81ed05fbac..2867bd0b5a8b9bbc8e920538262399fb2f3adb54 100644 --- a/rules/evaluate.smk +++ b/rules/evaluate.smk @@ -5,41 +5,42 @@ def merylDB(wildcards): return samples.loc[(wildcards.asmID), "merylDB"] -localrules: symlinkUnzippedHiC_R1, \ - symlinkUnzippedHiC_R2, \ - symlinkUnzippedFasta_PRI, \ - symlinkUnzippedFasta_ALT, \ +localrules: symlink_UnzippedFastq_R1_HiC, \ + symlink_UnzippedFastq_R2_HiC, \ + symlink_UnzippedFasta_PRI, \ + symlink_UnzippedFasta_ALT, \ # symlinkMerylDB, \ # moveBuscoOutputs, \ - saveConfiguration_and_getKeyValues_kmer, \ - saveConfiguration_and_getKeyValues, \ - aggregateAllAssemblies, \ - makeReport, \ - pretextMaps2md, \ - addFullTable, \ - aggregateReport, \ - fullTable_heatmap_external_create, \ - fullTable_heatmap_external_createPDF, \ - fullTable_heatmap_internal_create, \ - makePDF_combine + copyKeyResults_GenomeScope2Profiles, \ + copyKeyResults, \ + saveConfigurationAndSampleSheet_createComparisonTablesTSV, \ + IndividualKeyResults_createMD, \ + PretextMaps_createMD, \ + ComparisonTables_createMD, \ + ReportWithoutComparisonTables_createMD, \ + ReportWithoutComparisonTables_createPDF, \ + ComparisonTablesColoured_createHTML, \ + ComparisonTables_createPDF, \ + ComparisonTablesGradient_createHTML, \ + COMBINE_ALL_PDFS def HiC_R1_gzipped(wildcards): return yesGzip_HiC_R1.loc[(wildcards.asmID), "HiC_R1"] -rule unzipHiC_R1: +rule unzipFastq_R1_HiC: input: assembly=HiC_R1_gzipped, output: temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.fastq")), log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.R1.pigzUnzip.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/pigzUnzip..HiC.R1.{asmID}.log") conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") threads: - resource['unzipHiC_R1']['threads'] + resource['unzipFastq_R1_HiC']['threads'] resources: - mem_mb=resource['unzipHiC_R1']['mem_mb'], - time=resource['unzipHiC_R1']['time'], + mem_mb=resource['unzipFastq_R1_HiC']['mem_mb'], + time=resource['unzipFastq_R1_HiC']['time'], shell: """ pigz -p {threads} -c -d -k {input.assembly} > {output} 2> {log} @@ -48,7 +49,7 @@ rule unzipHiC_R1: def HiC_R1_unzipped(wildcards): return noGzip_HiC_R1.loc[(wildcards.asmID), "HiC_R1"] -rule symlinkUnzippedHiC_R1: +rule symlink_UnzippedFastq_R1_HiC: input: assembly=HiC_R1_unzipped, output: @@ -64,20 +65,20 @@ rule symlinkUnzippedHiC_R1: def HiC_R2_gzipped(wildcards): return yesGzip_HiC_R2.loc[(wildcards.asmID), "HiC_R2"] -rule unzipHiC_R2: +rule unzipFastq_R2_HiC: input: assembly=HiC_R2_gzipped, output: temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.fastq")), log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.R2.pigzUnzip.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/pigzUnzip..HiC.R2.{asmID}.log") conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") threads: - resource['unzipHiC_R2']['threads'] + resource['unzipFastq_R2_HiC']['threads'] resources: - mem_mb=resource['unzipHiC_R2']['mem_mb'], - time=resource['unzipHiC_R2']['time'] + mem_mb=resource['unzipFastq_R2_HiC']['mem_mb'], + time=resource['unzipFastq_R2_HiC']['time'] shell: """ pigz -p {threads} -c -d -k {input.assembly} > {output} 2> {log} @@ -86,7 +87,7 @@ rule unzipHiC_R2: def HiC_R2_unzipped(wildcards): return noGzip_HiC_R2.loc[(wildcards.asmID), "HiC_R2"] -rule symlinkUnzippedHiC_R2: +rule symlink_UnzippedFastq_R2_HiC: input: assembly=HiC_R2_unzipped, output: @@ -102,7 +103,7 @@ rule symlinkUnzippedHiC_R2: -rule pretext_index_PRI_asm: +rule indexFasta_PRI: input: assemblyPRI=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.PRI.fasta"), output: @@ -113,21 +114,21 @@ rule pretext_index_PRI_asm: os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.PRI.fasta.pac"), os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.PRI.fasta.fai") log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.PRI.indexing.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/indexASM.PRI.{asmID}.log") conda: - os.path.join(workflow.basedir, "envs/pretext.yaml") + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: - resource['pretext_index_PRI_asm']['threads'] + resource['indexFasta_PRI']['threads'] resources: - mem_mb=resource['pretext_index_PRI_asm']['mem_mb'], - time=resource['pretext_index_PRI_asm']['time'] + mem_mb=resource['indexFasta_PRI']['mem_mb'], + time=resource['indexFasta_PRI']['time'] shell: """ (bwa-mem2 index {input.assemblyPRI}) &> {log} (samtools faidx {input.assemblyPRI}) &>> {log} """ -rule pretext_fastq2bam_R1: +rule convertFastqTObam_R1_HiC: input: HiC_R1=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.fastq"), assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.PRI.fasta"), @@ -140,20 +141,20 @@ rule pretext_fastq2bam_R1: output: temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.bam")) conda: - os.path.join(workflow.basedir, "envs/pretext.yaml") + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: - resource['pretext_fastq2bam_R1']['threads'] + resource['convertFastqTObam_R1_HiC']['threads'] resources: - mem_mb=resource['pretext_fastq2bam_R1']['mem_mb'], - time=resource['pretext_fastq2bam_R1']['time'] + mem_mb=resource['convertFastqTObam_R1_HiC']['mem_mb'], + time=resource['convertFastqTObam_R1_HiC']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.fastq2bam.R1.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/fastq2bam.HiC.R1.{asmID}.log") shell: """ (bwa-mem2 mem -t {threads} -B8 {input.assembly} {input.HiC_R1} | samtools view -Sb - > {output}) &> {log} """ -rule pretext_fastq2bam_R2: +rule convertFastqTObam_R2_HiC: input: HiC_R2=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.fastq"), assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.PRI.fasta"), @@ -166,20 +167,20 @@ rule pretext_fastq2bam_R2: output: temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.bam")) conda: - os.path.join(workflow.basedir, "envs/pretext.yaml") + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: - resource['pretext_fastq2bam_R2']['threads'] + resource['convertFastqTObam_R2_HiC']['threads'] resources: - mem_mb=resource['pretext_fastq2bam_R2']['mem_mb'], - time=resource['pretext_fastq2bam_R2']['time'] + mem_mb=resource['convertFastqTObam_R2_HiC']['mem_mb'], + time=resource['convertFastqTObam_R2_HiC']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.fastq2bam.R2.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/fastq2bam.HiC.R2.{asmID}.log") shell: """ (bwa-mem2 mem -t {threads} -B8 {input.assembly} {input.HiC_R2} | samtools view -Sb - > {output}) &> {log} """ -rule pretext_filter_5primeEnd_R1: +rule filter5PrimeEnd_R1_HiC: input: HiC_R1_bam=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.bam"), assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.PRI.fasta"), @@ -194,20 +195,20 @@ rule pretext_filter_5primeEnd_R1: params: script=os.path.join(workflow.basedir, "scripts/process_HiC/filter_five_end.pl") conda: - os.path.join(workflow.basedir, "envs/pretext.yaml") + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: - resource['pretext_filter_5primeEnd_R1']['threads'] + resource['filter5PrimeEnd_R1_HiC']['threads'] resources: - mem_mb=resource['pretext_filter_5primeEnd_R1']['mem_mb'], - time=resource['pretext_filter_5primeEnd_R1']['time'] + mem_mb=resource['filter5PrimeEnd_R1_HiC']['mem_mb'], + time=resource['filter5PrimeEnd_R1_HiC']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.filter5end.R1.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/filtered.HiC.R1.{asmID}.log") shell: """ (samtools view -h {input.HiC_R1_bam}| perl {params.script} | samtools view -@{threads} -Sb - > {output}) &> {log} """ -rule pretext_filter_5primeEnd_R2: +rule filter5PrimeEnd_R2_HiC: input: HiC_R2_bam=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.bam"), assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.PRI.fasta"), @@ -222,21 +223,21 @@ rule pretext_filter_5primeEnd_R2: params: script=os.path.join(workflow.basedir, "scripts/process_HiC/filter_five_end.pl") conda: - os.path.join(workflow.basedir, "envs/pretext.yaml") + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: - resource['pretext_filter_5primeEnd_R2']['threads'] + resource['filter5PrimeEnd_R2_HiC']['threads'] resources: - mem_mb=resource['pretext_filter_5primeEnd_R2']['mem_mb'], - time=resource['pretext_filter_5primeEnd_R2']['time'] + mem_mb=resource['filter5PrimeEnd_R2_HiC']['mem_mb'], + time=resource['filter5PrimeEnd_R2_HiC']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.filter5end.R2.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/filtered.HiC.R1.{asmID}.log") shell: """ (samtools view -h {input.HiC_R2_bam}| perl {params.script} | samtools view -@{threads} -Sb - > {output}) &> {log} """ -rule pretext_filtered_combine: +rule pairAndCombineFiltered_HiC: input: R1=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.FILTERED.bam"), R2=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.FILTERED.bam") @@ -245,39 +246,39 @@ rule pretext_filtered_combine: params: script=os.path.join(workflow.basedir, "scripts/process_HiC/two_read_bam_combiner.pl") conda: - os.path.join(workflow.basedir, "envs/pretext.yaml") + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: - resource['pretext_filtered_combine']['threads'] + resource['pairAndCombineFiltered_HiC']['threads'] resources: - mem_mb=resource['pretext_filtered_combine']['mem_mb'], - time=resource['pretext_filtered_combine']['time'] + mem_mb=resource['pairAndCombineFiltered_HiC']['mem_mb'], + time=resource['pairAndCombineFiltered_HiC']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.COMBINED.FILTERED.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/combine.filtered.HiC.{asmID}.log") shell: """ (perl {params.script} {input.R1} {input.R2} | samtools view -@{threads} -Sb > {output}) &>{log} """ -rule pretext_map: +rule pretextMap: input: HiC_alignment=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.COMBINED.FILTERED.bam") output: pretextFile=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.COMBINED.FILTERED.pretext") conda: - os.path.join(workflow.basedir, "envs/pretext.yaml") + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: - resource['pretext_map']['threads'] + resource['pretextMap']['threads'] resources: - mem_mb=resource['pretext_map']['mem_mb'], - time=resource['pretext_map']['time'] + mem_mb=resource['pretextMap']['mem_mb'], + time=resource['pretextMap']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.COMBINED.FILTERED.PretextMap.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/PretextMap.{asmID}.log") shell: """ (samtools view -h {input.HiC_alignment} | PretextMap -o {output.pretextFile} --sortby length --mapq 10) &> {log} """ -rule pretext_snapshot: +rule pretextSnapshot: input: pretextFile=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.COMBINED.FILTERED.pretext") output: @@ -285,14 +286,14 @@ rule pretext_snapshot: params: outDirectory=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/") conda: - os.path.join(workflow.basedir, "envs/pretext.yaml") + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: - resource['pretext_snapshot']['threads'] + resource['pretextSnapshot']['threads'] resources: - mem_mb=resource['pretext_snapshot']['mem_mb'], - time=resource['pretext_snapshot']['time'] + mem_mb=resource['pretextSnapshot']['mem_mb'], + time=resource['pretextSnapshot']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.HiC.COMBINED.FILTERED.PretextSnapshot.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/PretextSnapshot.{asmID}.log") shell: """ (PretextSnapshot -m {input.pretextFile} -o {params.outDirectory}) &> {log} @@ -310,9 +311,9 @@ rule unzipFasta_PRI: output: os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.PRI.fasta"), log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.PRI.pigzUnzip.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/pigzUnzip.PRI.{asmID}.log") conda: - os.path.join(workflow.basedir, "envs/pigz.yaml") + os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml") threads: resource['unzipFasta_PRI']['threads'] resources: @@ -326,7 +327,7 @@ rule unzipFasta_PRI: def PRI_asm_unzipped(wildcards): return noGzip_PRI.loc[(wildcards.asmID), "PRI_asm"] -rule symlinkUnzippedFasta_PRI: +rule symlink_UnzippedFasta_PRI: input: assembly=PRI_asm_unzipped, output: @@ -349,7 +350,7 @@ rule unzipFasta_ALT: output: os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.ALT.fasta"), log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}.ALT.pigzUnzip.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/pigzUnzip.ALT.{asmID}.log") conda: os.path.join(workflow.basedir, "envs/pigz.yaml") threads: @@ -366,7 +367,7 @@ rule unzipFasta_ALT: def ALT_asm_unzipped(wildcards): return noGzip_ALT.loc[(wildcards.asmID), "ALT_asm"] -rule symlinkUnzippedFasta_ALT: +rule symlink_UnzippedFasta_ALT: input: assembly=ALT_asm_unzipped, output: @@ -425,11 +426,11 @@ rule merqury: os.path.join(config['Results'],"1_evaluation/{asmID}/QV.KMER-COMPLETENESS.CN-SPECTRA/{asmID}_merqOutput.spectra-cn.fl.png"), os.path.join(config['Results'],"1_evaluation/{asmID}/QV.KMER-COMPLETENESS.CN-SPECTRA/merylDB_providedFor_{asmID}.hist") log: - os.path.join(config['Results'],"1_evaluation/{asmID}/logs/{asmID}_merqury.log") + os.path.join(config['Results'],"1_evaluation/{asmID}/logs/merqury.{asmID}.log") priority: 3 conda: - os.path.join(workflow.basedir, "envs/merylMerq_2.yaml") + os.path.join(workflow.basedir, "envs/MERYL_MERQURY.yaml") shell: """ ln -s {input.merylDB_provided} {params.symlink_merylDB} @@ -459,9 +460,9 @@ rule busco5: output: os.path.join(config['Results'], "1_evaluation/{asmID}/BUSCOs/{asmID}/short_summary.specific." + buscoDataBaseName + "_odb10.{asmID}.txt"), conda: - os.path.join(workflow.basedir, "envs/busco_and_assembly.yaml") + os.path.join(workflow.basedir, "envs/BUSCO.yaml") log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/{asmID}_busco5.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/busco5.{asmID}.log") priority: 20 shell: @@ -474,7 +475,7 @@ rule busco5: -rule genomescope2: +rule GenomeScope2Profiles: input: hist=os.path.join(config['Results'],"1_evaluation/{asmID}/QV.KMER-COMPLETENESS.CN-SPECTRA/merylDB_providedFor_{asmID}.hist"), params: @@ -488,14 +489,14 @@ rule genomescope2: linearPlot=os.path.join(config['Results'], "1_evaluation/{asmID}/GENOMESCOPE_PROFILES/{asmID}_k{kmer}_linear_plot.png"), estimatedSize=os.path.join(config['Results'], "1_evaluation/{asmID}/GENOMESCOPE_PROFILES/{asmID}_k{kmer}_sizeEst.txt") conda: - os.path.join(workflow.basedir, "envs/genomescope.yaml") + os.path.join(workflow.basedir, "envs/AUXILIARY_R_SCRIPTS.yaml") log: - os.path.join(config['Results'],"1_evaluation/{asmID}/logs/{asmID}_k{kmer}_gscopelog.txt") + os.path.join(config['Results'],"1_evaluation/{asmID}/logs/genomescopeProfiles.{asmID}.{kmer}.log") threads: - resource['genomescope2']['threads'] + resource['GenomeScope2Profiles']['threads'] resources: - mem_mb=resource['genomescope2']['mem_mb'], - time=resource['genomescope2']['time'] + mem_mb=resource['GenomeScope2Profiles']['mem_mb'], + time=resource['GenomeScope2Profiles']['time'] shell: """ head -n 10000 {input.hist} > {params.cpHist} @@ -516,7 +517,9 @@ rule assemblyStats: filename="{asmID}", given_size=lambda wildcards: expand("{genomeSize}", genomeSize=dictSamples[wildcards.asmID][4]) conda: - os.path.join(workflow.basedir, "envs/python_scripts.yaml") + os.path.join(workflow.basedir, "envs/AUXILIARY_PYTHON_SCRIPTS.yaml") + log: + os.path.join(config['Results'],"1_evaluation/{asmID}/logs/assemblyStats.{asmID}.log") threads: resource['assemblyStats']['threads'] resources: @@ -524,13 +527,13 @@ rule assemblyStats: time=resource['assemblyStats']['time'] shell: """ - python {params.script} {input.assembly} {input.estGenome} {params.filename} {params.given_size} {output.scaffStats} {output.contStats} + (python {params.script} {input.assembly} {input.estGenome} {params.filename} {params.given_size} {output.scaffStats} {output.contStats}) &> {log} """ -rule saveConfiguration_and_getKeyValues_kmer: +rule copyKeyResults_GenomeScope2Profiles: input: gscopeSum=os.path.join(config['Results'], "1_evaluation/{asmID}/GENOMESCOPE_PROFILES/{asmID}_k{kmer}_summary.txt"), gscopeLog=os.path.join(config['Results'], "1_evaluation/{asmID}/GENOMESCOPE_PROFILES/{asmID}_k{kmer}_log_plot.png"), @@ -551,11 +554,11 @@ rule saveConfiguration_and_getKeyValues_kmer: -rule saveConfiguration_and_getKeyValues: +rule copyKeyResults: input: gscopeSum=lambda wildcards: expand(os.path.join(config['Results'], "1_evaluation/{asmID}/GENOMESCOPE_PROFILES/{asmID}_k{kmer}_summary.txt"), asmID=wildcards.asmID, kmer=dictSamples[wildcards.asmID][4]), - gscopeLog=lambda wildcards: expand(os.path.join(config['Results'], "1_evaluation/{asmID}/GENOMESCOPE_PROFILES/{asmID}_k{kmer}_log_plot.png"), asmID=wildcards.asmID, kmer=dictSamples[wildcards.asmID][4]), - gscopeLin=lambda wildcards: expand(os.path.join(config['Results'], "1_evaluation/{asmID}/GENOMESCOPE_PROFILES/{asmID}_k{kmer}_linear_plot.png"), asmID=wildcards.asmID, kmer=dictSamples[wildcards.asmID][4]), + # gscopeLog=lambda wildcards: expand(os.path.join(config['Results'], "1_evaluation/{asmID}/GENOMESCOPE_PROFILES/{asmID}_k{kmer}_log_plot.png"), asmID=wildcards.asmID, kmer=dictSamples[wildcards.asmID][4]), + # gscopeLin=lambda wildcards: expand(os.path.join(config['Results'], "1_evaluation/{asmID}/GENOMESCOPE_PROFILES/{asmID}_k{kmer}_linear_plot.png"), asmID=wildcards.asmID, kmer=dictSamples[wildcards.asmID][4]), busco=os.path.join(config['Results'], "1_evaluation/{asmID}/BUSCOs/{asmID}/short_summary.specific." + buscoDataBaseName + "_odb10.{asmID}.txt"), qv=os.path.join(config['Results'],"1_evaluation/{asmID}/QV.KMER-COMPLETENESS.CN-SPECTRA/{asmID}_merqOutput.qv"), completeness=os.path.join(config['Results'],"1_evaluation/{asmID}/QV.KMER-COMPLETENESS.CN-SPECTRA/{asmID}_merqOutput.completeness.stats"), @@ -625,7 +628,7 @@ rule saveConfiguration_and_getKeyValues: rm {params.rowNames} {params.keyValues} """ -rule aggregateAllAssemblies: +rule saveConfigurationAndSampleSheet_createComparisonTablesTSV: input: allResults=expand(os.path.join(config['Results'],"1_evaluation/{asmID}/KEY_RESULTS/{asmID}_aggregatedSTATS.tsv"), asmID=list(dictSamples.keys())), sampleSheet= config['samplesTSV'], @@ -647,9 +650,9 @@ rule aggregateAllAssemblies: sed -i 's/,//g' {output.results} """ -rule makeReport: +rule IndividualKeyResults_createMD: input: - os.path.join(config['Results'],"1_evaluation/{asmID}/KEY_RESULTS/{asmID}_aggregatedResults.tsv"), + os.path.join(config['Results'],"1_evaluation/{asmID}/KEY_RESULTS/{asmID}_aggregatedSTATS.tsv"), lambda wildcards: expand(os.path.join(config['Results'], "1_evaluation/{asmID}/KEY_RESULTS/{asmID}_k{kmer}_log_plot.png"),asmID=wildcards.asmID, kmer=dictSamples[wildcards.asmID][4]), lambda wildcards: expand(os.path.join(config['Results'], "1_evaluation/{asmID}/KEY_RESULTS/{asmID}_k{kmer}_linear_plot.png"),asmID=wildcards.asmID, kmer=dictSamples[wildcards.asmID][4]), os.path.join(config['Results'],"1_evaluation/{asmID}/KEY_RESULTS/{asmID}_merqOutput.qv"), @@ -658,7 +661,7 @@ rule makeReport: os.path.join(config['Results'],"1_evaluation/{asmID}/KEY_RESULTS/{asmID}_merqOutput.{asmID}.PRI.spectra-cn.st.png"), os.path.join(config['Results'],"1_evaluation/{asmID}/KEY_RESULTS/{asmID}_merqOutput.spectra-cn.st.png") conda: - os.path.join(workflow.basedir, "envs/python_scripts.yaml") + os.path.join(workflow.basedir, "envs/AUXILIARY_PYTHON_SCRIPTS.yaml") params: "{asmID}", "{kmer}", @@ -670,7 +673,7 @@ rule makeReport: -rule pretextMaps2md: +rule PretextMaps_createMD: input: PretextMap=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.COMBINED.FILTERED_FullMap.png") output: @@ -679,22 +682,22 @@ rule pretextMaps2md: params: assemblyName='{asmID}' conda: - os.path.join(workflow.basedir, "envs/python_scripts.yaml") + os.path.join(workflow.basedir, "envs/AUXILIARY_PYTHON_SCRIPTS.yaml") script: os.path.join(workflow.basedir, "scripts/report/pretextMapsToMarkdown.py") -rule addFullTable: +rule ComparisonTables_createMD: input: results=os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS.tsv") output: os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS.md"), os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS_roundedMB.tsv") conda: - os.path.join(workflow.basedir, "envs/python_scripts.yaml") + os.path.join(workflow.basedir, "envs/AUXILIARY_PYTHON_SCRIPTS.yaml") script: os.path.join(workflow.basedir, "scripts/report/addFullTableForReport.py") -rule fullTable_heatmap_external_create: +rule ComparisonTablesColoured_createHTML: input: os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS_roundedMB.tsv") params: @@ -707,11 +710,11 @@ rule fullTable_heatmap_external_create: # mem_mb=resource['fullTable_heatmap_external_create']['mem_mb'], # time=resource['fullTable_heatmap_external_create']['time'] conda: - os.path.join(workflow.basedir, "envs/genomescope.yaml") + os.path.join(workflow.basedir, "envs/AUXILIARY_R_SCRIPTS.yaml") script: os.path.join(workflow.basedir, "scripts/compare_results/fullTable_heatmap_external.R") -rule fullTable_heatmap_internal_create: +rule ComparisonTablesGradient_createHTML: input: os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS_roundedMB.tsv") params: @@ -724,18 +727,18 @@ rule fullTable_heatmap_internal_create: # mem_mb=resource['fullTable_heatmap_external_create']['mem_mb'], # time=resource['fullTable_heatmap_external_create']['time'] conda: - os.path.join(workflow.basedir, "envs/genomescope.yaml") + os.path.join(workflow.basedir, "envs/AUXILIARY_R_SCRIPTS.yaml") script: os.path.join(workflow.basedir, "scripts/compare_results/fullTable_heatmap_internalComparison.R") -rule fullTable_heatmap_external_createPDF: +rule ComparisonTables_createPDF: input: coloured=os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS_COLOURED.html"), gradient=os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS_GRADIENT.html") params: css=os.path.join(workflow.basedir, "scripts/report/tableOnSamePage.css") log: - os.path.join(config['Results'], "1_evaluation/logs/fullTable_heatmap_createPDF_pandoc.log") + os.path.join(config['Results'], "1_evaluation/logs/ComparisonTables_createPDF.log") output: coloured=os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS_COLOURED.pdf"), gradient=os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS_GRADIENT.pdf") @@ -745,7 +748,7 @@ rule fullTable_heatmap_external_createPDF: # mem_mb=resource['fullTable_heatmap_external_createPDF']['mem_mb'], # time=resource['fullTable_heatmap_external_createPDF']['time'] conda: - os.path.join(workflow.basedir, "envs/python_scripts.yaml") + os.path.join(workflow.basedir, "envs/AUXILIARY_PYTHON_SCRIPTS.yaml") shell: """ (pandoc -V papersize:a3 -V margin-top=1.5cm -V margin-left=1.5cm -V margin-right=0 -V margin-bottom=0 -c {params.css} -o {output.coloured} --pdf-engine=wkhtmltopdf {input.coloured}) &>> {log} @@ -753,7 +756,7 @@ rule fullTable_heatmap_external_createPDF: -o {output.gradient} --pdf-engine=wkhtmltopdf --pdf-engine-opt="-O" --pdf-engine-opt="Landscape" {input.gradient}) &>> {log} """ -rule aggregateReport: +rule ReportWithoutComparisonTables_createMD: input: indivMD=[expand(os.path.join(config['Results'],"1_evaluation/{asmID}/KEY_RESULTS/{asmID}_k{kmer}_markdownForReport.md"), asmID=key, kmer=value5) for key, [value1, value2, value3, value4, value5, value6, value7, value8, value9, value10, value11, value12, value13, value14, value15, value16] in dictSamples.items()], landingPage=os.path.join(workflow.basedir, "scripts/report/reportLandingPage.md"), @@ -772,7 +775,7 @@ rule aggregateReport: # cat {input.landingPageTABLE} {input.endTableMD} >> {output.endTableMD} -rule makePDF: +rule ReportWithoutComparisonTables_createPDF: input: md_report=os.path.join(config['Results'],"1_evaluation/finalResults/ALL_individual_REPORTS.md"), # md_comparison_table=os.path.join(config['Results'],"1_evaluation/finalResults/FullTableMarkdown_wPreamble.md") @@ -780,21 +783,21 @@ rule makePDF: pdf_report=os.path.join(config['Results'],"1_evaluation/finalResults/ALL_individual_REPORTS.pdf"), # pdf_comparison_table=os.path.join(config['Results'],"1_evaluation/finalResults/FULL_TABLE_PDF.pdf") log: - os.path.join(config['Results'], "1_evaluation/logs/MAKEPDF_pandoc.log") + os.path.join(config['Results'], "1_evaluation/logs/ReportWithoutComparisonTables_createPDF.log") conda: - os.path.join(workflow.basedir, "envs/python_scripts.yaml") - threads: - resource['makePDF']['threads'] - resources: - mem_mb=resource['makePDF']['mem_mb'], - time=resource['makePDF']['time'] + os.path.join(workflow.basedir, "envs/AUXILIARY_PYTHON_SCRIPTS.yaml") + # threads: + # resource['ReportWithoutComparisonTables_createPDF']['threads'] + # resources: + # mem_mb=resource['ReportWithoutComparisonTables_createPDF']['mem_mb'], + # time=resource['ReportWithoutComparisonTables_createPDF']['time'] shell: """ (pandoc -o {output.pdf_report} {input.md_report} --pdf-engine=tectonic) &>> {log} """ # (pandoc -o {output.pdf_comparison_table} {input.md_comparison_table} --pdf-engine=tectonic) &>> {log} -rule makePDF_combine: +rule COMBINE_ALL_PDFS: input: pdf_report=os.path.join(config['Results'],"1_evaluation/finalResults/ALL_individual_REPORTS.pdf"), coloured=os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS_COLOURED.pdf"), @@ -804,9 +807,9 @@ rule makePDF_combine: pdf_report=os.path.join(config['Results'],"1_evaluation/finalResults/FINAL_REPORT.pdf"), # pdf_comparison_table=os.path.join(config['Results'],"1_evaluation/finalResults/FULL_TABLE_PDF.pdf") log: - os.path.join(config['Results'], "1_evaluation/logs/MAKEPDF_combine.log") + os.path.join(config['Results'], "1_evaluation/logs/COMBINE_ALL_PDFS.log") conda: - os.path.join(workflow.basedir, "envs/python_scripts.yaml") + os.path.join(workflow.basedir, "envs/AUXILIARY_PYTHON_SCRIPTS.yaml") # threads: # resource['makePDF']['threads'] # resources: