diff --git a/rules/build_hifi.smk b/rules/build_hifi.smk index bc1ce1c5eb702c77417628dcdecc66f4ae3f0819..6c616e58a4e259e71088011e4524aa41c7fb7493 100644 --- a/rules/build_hifi.smk +++ b/rules/build_hifi.smk @@ -3,12 +3,12 @@ localrules: symlinkUnzippedHifi, symlinkfornotSmartTrimmed, multiqc_hifi -def fq_to_trimSMRTbell(wildcards): - return trimSMRTbell.loc[(wildcards.sample, wildcards.readCounter), "hifi_reads"] - - -def fq_to_notTrimSMRTbell(wildcards): - return notrimSMRTbell.loc[(wildcards.sample, wildcards.readCounter), "hifi_reads"] +# def fq_to_trimSMRTbell(wildcards): +# return trimSMRTbell.loc[(wildcards.sample, wildcards.readCounter), "hifi_reads"] +# +# +# def fq_to_notTrimSMRTbell(wildcards): +# return notrimSMRTbell.loc[(wildcards.sample, wildcards.readCounter), "hifi_reads"] def hifi_gzipped(wildcards): @@ -24,7 +24,7 @@ rule unzipHifi: input: fastq=hifi_gzipped, output: - temp(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/01_unzipFastqs/{readCounter}.{smrtornot}.fastq")), + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/temp_unzipFastqs/{readCounter}.{smrtornot}.fastq"), log: os.path.join(config['Results'], "0_buildDatabases/{sample}/logs/hifiReads/{readCounter}.{smrtornot}_pigzUnzip.log") conda: @@ -43,7 +43,7 @@ rule symlinkUnzippedHifi: input: fastq=hifi_notgzipped, output: - temp(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/01_unzipFastqs/{readCounter}.{smrtornot}.fastq")), + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/temp_unzipFastqs/{readCounter}.{smrtornot}.fastq"), log: os.path.join(config['Results'], "0_buildDatabases/{sample}/logs/hifiReads/{readCounter}.{smrtornot}_pigzUnzip.log") container: @@ -56,9 +56,9 @@ rule symlinkUnzippedHifi: rule trimSMRTbell: input: - os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/01_unzipFastqs/{readCounter}.smrtTrimmed.fastq"), + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/temp_unzipFastqs/{readCounter}.smrtTrimmed.fastq"), output: - outputFile=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/02_trimReads/{readCounter}.smrtTrimmed.fastq")) + outputFile=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/{readCounter}.smrtTrimmed.fastq") threads: resource['trimSMRTbell']['threads'] resources: @@ -77,9 +77,9 @@ rule trimSMRTbell: rule symlinkfornotSmartTrimmed: input: - os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/01_unzipFastqs/{readCounter}.notsmrtTrimmed.fastq"), + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/temp_unzipFastqs/{readCounter}.notsmrtTrimmed.fastq"), output: - outputFile=temp(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/02_trimReads/{readCounter}.notsmrtTrimmed.fastq")) + outputFile=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/{readCounter}.notsmrtTrimmed.fastq") container: None shell: @@ -89,11 +89,11 @@ rule symlinkfornotSmartTrimmed: rule fastqc_hifi: input: - os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/02_trimReads/{readCounter}.{smrtornot}.fastq") + os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/{readCounter}.{smrtornot}.fastq") params: - folder2out=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/05_multiqc/") + folder2out=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/fastqc") output: - os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/05_multiqc/{readCounter}.{smrtornot}_fastqc.html") + os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/fastqc/{readCounter}.{smrtornot}_fastqc.html") threads: resource['trimSMRTbell']['threads'] resources: @@ -105,29 +105,29 @@ rule fastqc_hifi: os.path.join(workflow.basedir, "envs/pigz.yaml") shell: """ - mkdir {params.folder2out} (fastqc {input} -o {params.folder2out} -t {threads}) &> {log} """ rule multiqc_hifi: input: - lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/05_multiqc/{readCounter}.{smrtornot}_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], smrtornot=dictSamples[wildcards.sample][1]) + lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/fastqc/{readCounter}.{smrtornot}_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], smrtornot=dictSamples[wildcards.sample][1]) params: - folder2qc=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/05_multiqc/"), - filename="{sample}.{smrtornot}.multiqcReport.html" + folder2qc=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/fastqc/"), + folder2OUT=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/multiqc/"), + filename="{sample}.multiqcReport.html" output: - os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/05_multiqc/{sample}.{smrtornot}.multiqcReport.html") + os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/multiqc/{sample}.multiqcReport.html") log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{sample}.{smrtornot}.multiqc.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{sample}.multiqc.log") conda: os.path.join(workflow.basedir, "envs/pigz.yaml") shell: - "(multiqc {params.folder2qc} -o {params.folder2qc} -n {params.filename}) &> {log}" + "(multiqc {params.folder2qc} -o {params.folder2OUT} -n {params.filename}) &> {log}" rule meryl_hifi_count: input: - reads=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/02_trimReads/{readCounter}.{smrtornot}.fastq") + reads=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/{readCounter}.{smrtornot}.fastq") params: kmer = "{kmer}" threads: @@ -136,7 +136,7 @@ rule meryl_hifi_count: mem_mb=resource['meryl_hifi_count']['mem_mb'], time=resource['meryl_hifi_count']['time'], output: - temp(directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/03_merylDb/" + "{readCounter}" + "_hifi_dB.{smrtornot}.{kmer}.meryl"))), + temp(directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/merylDb/" + "{readCounter}" + "_hifi_dB.{smrtornot}.{kmer}.meryl"))), log: os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{readCounter}_hifi_{kmer}.{smrtornot}.log") priority: @@ -150,18 +150,20 @@ rule meryl_hifi_count: rule meryl_hifi_build: input: - lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/03_merylDb/{readCounter}_hifi_dB.{smrtornot}.{kmer}.meryl/"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], kmer=dictSamples[wildcards.sample][0], smrtornot=dictSamples[wildcards.sample][1]) + lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/merylDb/{readCounter}_hifi_dB.{smrtornot}.{kmer}.meryl/"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], kmer=dictSamples[wildcards.sample][0], smrtornot=dictSamples[wildcards.sample][1]) params: - kmer = "{kmer}" + kmer = "{kmer}", + removeReadDIR_trimmed=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/"), + removeReadDIR_unzipped=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_unzipFastqs/") threads: resource['meryl_hifi_build']['threads'] resources: mem_mb=resource['meryl_hifi_build']['mem_mb'], time=resource['meryl_hifi_build']['time'], output: - directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/03_merylDb/complete_hifi_{sample}_dB.{smrtornot}.{kmer}.meryl")), + directory(os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/merylDb/complete_hifi.{sample}.{kmer}.meryl")), log: - os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{sample}_hifi_{smrtornot}.{kmer}.log") + os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/{sample}.meryl_hifi.{kmer}.log") priority: 10 conda: @@ -169,4 +171,6 @@ rule meryl_hifi_build: shell: """ (meryl union-sum {input} output {output}) &> {log} + rm -r {params.removeReadDIR_trimmed} + rm -r {params.removeReadDIR_unzipped} """