diff --git a/rules/build_illumina.smk b/rules/build_illumina.smk index dd88c0a473fc06245eb7796d12b0d0971caae72e..c1bb0e9eddc56bb9bf37a8987423d6ed80363956 100644 --- a/rules/build_illumina.smk +++ b/rules/build_illumina.smk @@ -23,7 +23,7 @@ rule unzipFastq_R1: input: assembly=R1_gzipped, output: - os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R1.fastq"), + os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R1.fastq"), log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_R1_pigzUnzip.log"), conda: @@ -42,7 +42,7 @@ rule symlinkUnzippedFastq_R1: input: assembly=R1_notgzipped, output: - os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R1.fastq"), + os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R1.fastq"), container: None shell: @@ -54,7 +54,7 @@ rule unzipFastq_R2: input: assembly=R2_gzipped, output: - os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R2.fastq"), + os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R2.fastq"), log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_R2_pigzUnzip.log") conda: @@ -73,7 +73,7 @@ rule symlinkUnzippedFastq_R2: input: assembly=R2_notgzipped, output: - os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R2.fastq"), + os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.{trim10x}.{trimAdapters}_R2.fastq"), container: None shell: @@ -84,9 +84,9 @@ rule symlinkUnzippedFastq_R2: rule trim10xbarcodes: input: - read1=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.10xTrimmed.{trimAdapters}_R1.fastq"), + read1=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.10xTrimmed.{trimAdapters}_R1.fastq"), output: - read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.{trimAdapters}_Read1.fastq"), + read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.10xTrimmed.{trimAdapters}_Read1.fastq"), threads: resource['trim10xbarcodes']['threads'] resources: @@ -104,9 +104,9 @@ rule trim10xbarcodes: rule symlink_trim10xbarcodesR2: input: - read2=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.10xTrimmed.{trimAdapters}_R2.fastq") + read2=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.10xTrimmed.{trimAdapters}_R2.fastq") output: - read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.{trimAdapters}_Read2.fastq") + read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.10xTrimmed.{trimAdapters}_Read2.fastq") container: None shell: @@ -118,11 +118,11 @@ rule symlink_trim10xbarcodesR2: rule symLink_trim10xbarcodes_notrimAdapt: input: - read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_Read1.fastq"), - read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_Read2.fastq"), + read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_Read1.fastq"), + read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_Read2.fastq"), output: - read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_val_1.fq"), - read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_val_2.fq") + read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_val_1.fq"), + read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.10xTrimmed.notAdaptTrimmed_val_2.fq") container: None shell: @@ -133,11 +133,11 @@ rule symLink_trim10xbarcodes_notrimAdapt: rule symlinks_no10xOrAdaptTrim: input: - read1=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.not10xTrimmed.notAdaptTrimmed_R1.fastq"), - read2=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.not10xTrimmed.notAdaptTrimmed_R2.fastq") + read1=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.not10xTrimmed.notAdaptTrimmed_R1.fastq"), + read2=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.not10xTrimmed.notAdaptTrimmed_R2.fastq") output: - read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.not10xTrimmed.notAdaptTrimmed_val_1.fq"), - read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.not10xTrimmed.notAdaptTrimmed_val_2.fq") + read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.not10xTrimmed.notAdaptTrimmed_val_1.fq"), + read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.not10xTrimmed.notAdaptTrimmed_val_2.fq") container: None shell: @@ -148,11 +148,11 @@ rule symlinks_no10xOrAdaptTrim: rule symlinks_no10xwithAdaptTrim: input: - read1=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.not10xTrimmed.AdaptTrimmed_R1.fastq"), - read2=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/{readCounter}.not10xTrimmed.AdaptTrimmed_R2.fastq") + read1=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.not10xTrimmed.AdaptTrimmed_R1.fastq"), + read2=os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/{readCounter}.not10xTrimmed.AdaptTrimmed_R2.fastq") output: - read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.not10xTrimmed.AdaptTrimmed_Read1.fastq"), - read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.not10xTrimmed.AdaptTrimmed_Read2.fastq") + read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.not10xTrimmed.AdaptTrimmed_Read1.fastq"), + read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.not10xTrimmed.AdaptTrimmed_Read2.fastq") container: None shell: @@ -163,14 +163,14 @@ rule symlinks_no10xwithAdaptTrim: rule trimAdapters: input: - read1= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_Read1.fastq"), - read2= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_Read2.fastq"), + read1= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_Read1.fastq"), + read2= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_Read2.fastq"), params: - outputDir=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/"), + outputDir=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/"), r1_prefix="{readCounter}.{trim10x}.AdaptTrimmed", output: - read1=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_val_1.fq")), - read2=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_val_2.fq")) + read1=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_val_1.fq")), + read2=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.AdaptTrimmed_val_2.fq")) threads: resource['trimAdapters']['threads'] resources: @@ -187,13 +187,13 @@ rule trimAdapters: rule fastqc_Illumina: input: - read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_1.fq"), - read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_2.fq") + read1=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_1.fq"), + read2=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_2.fq") params: - folder2out=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/04_fastqc") + folder2out=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/QC/fastqc") output: - os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/04_fastqc/{readCounter}.{trim10x}.{trimAdapters}_val_1_fastqc.html"), - os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/04_fastqc/{readCounter}.{trim10x}.{trimAdapters}_val_2_fastqc.html") + os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/QC/fastqc/{readCounter}.{trim10x}.{trimAdapters}_val_1_fastqc.html"), + os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/QC/fastqc/{readCounter}.{trim10x}.{trimAdapters}_val_2_fastqc.html") log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_fastqc.log") threads: @@ -208,16 +208,16 @@ rule fastqc_Illumina: rule multiqc_hifi: input: - read1=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/04_fastqc/{readCounter}.{trim10x}.{trimAdapters}_val_1_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]), - read2=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/04_fastqc/{readCounter}.{trim10x}.{trimAdapters}_val_2_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]) + read1=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/QC/fastqc/{readCounter}.{trim10x}.{trimAdapters}_val_1_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]), + read2=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/QC/fastqc/{readCounter}.{trim10x}.{trimAdapters}_val_2_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]) params: - folder2qc=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/04_fastqc/"), - folder2out=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/05_multiqc/"), - filename="{sample}.{trim10x}.{trimAdapters}.multiqcReport.html" + folder2qc=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/QC/fastqc/"), + folder2out=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/QC/multiqc/"), + filename="{sample}.multiqcReport.html" output: - os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/05_multiqc/{sample}.{trim10x}.{trimAdapters}.multiqcReport.html") + os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/QC/multiqc/{sample}.multiqcReport.html") log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{sample}.{trim10x}.{trimAdapters}_multiqc.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{sample}.multiqc.log") conda: os.path.join(workflow.basedir, "envs/pigz.yaml") shell: @@ -227,7 +227,7 @@ rule multiqc_hifi: rule meryl_R1: input: - read1= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_1.fq") + read1= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_1.fq") params: kmer = "{kmer}", threads: @@ -236,7 +236,7 @@ rule meryl_R1: mem_mb=resource['meryl_R1']['mem_mb'], time=resource['meryl_R1']['time'], output: - directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/03_merylDb/{readCounter}.{trim10x}.{trimAdapters}_R1.{kmer}.meryl")) + temp(directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/merylDb/{readCounter}.{trim10x}.{trimAdapters}_R1.{kmer}.meryl"))) log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_meryl_R1.{kmer}.log") priority: @@ -251,7 +251,7 @@ rule meryl_R1: rule meryl_R2: input: - read2= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_2.fq") + read2= os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_2.fq") params: kmer = "{kmer}", threads: @@ -260,7 +260,7 @@ rule meryl_R2: mem_mb=resource['meryl_R2']['mem_mb'], time=resource['meryl_R2']['time'], output: - directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/03_merylDb/{readCounter}.{trim10x}.{trimAdapters}_R2.{kmer}.meryl")) + temp(directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/merylDb/{readCounter}.{trim10x}.{trimAdapters}_R2.{kmer}.meryl"))) log: os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{readCounter}.{trim10x}.{trimAdapters}_meryl_R2.{kmer}.log") priority: @@ -276,24 +276,24 @@ rule meryl_R2: rule meryl_illumina_build: input: - removeReads1=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_1.fq"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]), - removeReads2=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_2.fq"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]), - read1=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/03_merylDb/{readCounter}.{trim10x}.{trimAdapters}_R1.{kmer}.meryl"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], kmer=dictSamples[wildcards.sample][0], trimAdapters=dictSamples[wildcards.sample][2]), - read2=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/03_merylDb/{readCounter}.{trim10x}.{trimAdapters}_R2.{kmer}.meryl"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], kmer=dictSamples[wildcards.sample][0], trimAdapters=dictSamples[wildcards.sample][2]) + # removeReads1=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_1.fq"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]), + # removeReads2=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/{readCounter}.{trim10x}.{trimAdapters}_val_2.fq"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], trimAdapters=dictSamples[wildcards.sample][2]), + read1=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/merylDb/{readCounter}.{trim10x}.{trimAdapters}_R1.{kmer}.meryl"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], kmer=dictSamples[wildcards.sample][0], trimAdapters=dictSamples[wildcards.sample][2]), + read2=lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/merylDb/{readCounter}.{trim10x}.{trimAdapters}_R2.{kmer}.meryl"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], trim10x=dictSamples[wildcards.sample][1], kmer=dictSamples[wildcards.sample][0], trimAdapters=dictSamples[wildcards.sample][2]) params: - removeReadDIR_trimmed=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/02_trimReads/"), - removeReadDIR_unzipped=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/01_unzipFastqs/"), + removeReadDIR_trimmed=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_trimReads/"), + removeReadDIR_unzipped=os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/temp_unzipFastqs/"), kmer = "{kmer}", - path= os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/03_merylDb/") + path= os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/merylDb/") threads: resource['meryl_illumina_build']['threads'] resources: mem_mb=resource['meryl_illumina_build']['mem_mb'], time=resource['meryl_illumina_build']['time'], output: - directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/03_merylDb/complete_{sample}_illuminaDb.{trim10x}.{trimAdapters}.{kmer}.meryl")), + directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/merylDb/complete_illumina.{sample}.{kmer}.meryl")), log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{sample}_hifi_{kmer}.{trim10x}.{trimAdapters}.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/logs/{sample}.illumina_meryl.{kmer}.log") priority: 10 conda: