diff --git a/project/config/config.yaml b/project/config/config.yaml index 9d229a0f580ba04b94a0159e40e32d36eb8f63f4..3503ca1b23b7dc8c55bd3f80b02859b86c8ef710 100644 --- a/project/config/config.yaml +++ b/project/config/config.yaml @@ -2,10 +2,10 @@ samples: "resources/sample/sample.tsv" human_reference: "resources/reference/human.fa" index: "results/reference/index" -bowtieparams: "-q" +bowtie_params: "-q" kraken_db: "/buffer/ag_bsc/pmsb_workflows_2022/kraken_standard_db/standard_db" viral_reference_alignment: "resources/reference/SARS-CoV-2.fasta" shiver_config_file: "config/shiver_config.sh" viral_sequencing_adapters: "resources/adapters/SARS-CoV-2.fasta" viral_sequencing_primers: "resources/primers/SARS-CoV-2.fasta" -fastp_params: "--adapter_sequence TCTCGGTC --adapter_sequence_r2 GCGGACTT" \ No newline at end of file +fastp_params: "--adapter_sequence TCTCGGTC --adapter_sequence_r2 GCGGACTT" diff --git a/project/workflow/rules/IVA.smk b/project/workflow/rules/IVA.smk index 7217bd98b1b61998ce3e1f860621a8e1f8ea789d..c69da72e975a7f9477275d82598c1ae18e0837f1 100644 --- a/project/workflow/rules/IVA.smk +++ b/project/workflow/rules/IVA.smk @@ -1,15 +1,17 @@ rule IVA: + message: + "Genome Assembly..." input: fq1="results/unmapped_fastq/{sample}_1_unmapped.fastq", fq2="results/unmapped_fastq/{sample}_2_unmapped.fastq" output: - directory("results/IVA/{sample}/") + IVA_dir=directory("results/IVA/{sample}/") log: "logs/IVA/{sample}.log" threads: 16 conda: "../envs/yourenv.yaml" shell: - "iva -f {input.fq1} -r {input.fq2} {output} --threads {threads} 2> {log}" + "iva -f {input.fq1} -r {input.fq2} {output.IVA_dir} --threads {threads} 2> {log}" \ No newline at end of file diff --git a/project/workflow/rules/bowtie2.smk b/project/workflow/rules/bowtie2.smk index 12551c4786501419fa02d8e18ea39400358ec5fe..4150eabbbc63d80ecd2d341e00ba02de7f55094c 100644 --- a/project/workflow/rules/bowtie2.smk +++ b/project/workflow/rules/bowtie2.smk @@ -1,20 +1,24 @@ rule refindex: + message: + "Building mapping index..." input: prefix=config["human_reference"] params: prefix=config["index"] output: - "results/reference/index.1.bt2" + "results/reference/index.bt2" log: "logs/bowtie2/refindex/index.1.log" threads:4 conda: "../envs/yourenv.yaml" shell: - "v1.3.2/bio/bowtie2/build" + "bowtie2-build {input} {params.prefix} > {output}" rule map: + message: + "Mapping reads to human reference..." input: idx=multiext( "results/reference/index", @@ -27,7 +31,7 @@ rule map: ), sample=["results/fastp/trimmed/{sample}.1.fastq", "results/fastp/trimmed/{sample}.2.fastq"] params: - extra=config["bowtieparams"] + extra=config["bowtie_params"] output: "results/bam/{sample}.bam" threads:4 diff --git a/project/workflow/rules/fastp.smk b/project/workflow/rules/fastp.smk index 51a51673ffa7da1633dc0daface1c96b055f46fe..b9929f94fd452d23f88ad38906e87f4893958b0d 100644 --- a/project/workflow/rules/fastp.smk +++ b/project/workflow/rules/fastp.smk @@ -1,16 +1,18 @@ rule fastp: + message: + "Quality controlling raw data..." input: sample=["Data/SARS-Cov-2/{sample}_1.fastq.gz", "Data/SARS-Cov-2/{sample}_2.fastq.gz"] output: trimmed=["results/fastp/trimmed/{sample}.1.fastq", "results/fastp/trimmed/{sample}.2.fastq"], - html="report/fastp/{sample}.html", - json="report/fastp/{sample}.json" + html="results/fastp/report/{sample}.html", + json="results/fastp/report/{sample}.json" log: "logs/fastp/{sample}.log" conda: "../envs/yourenv.yaml" params: - extra=config["fastp_params] + extra=config["fastp_params"] threads: 4 wrapper: "v1.3.2/bio/fastp" \ No newline at end of file diff --git a/project/workflow/rules/fastqc.smk b/project/workflow/rules/fastqc.smk new file mode 100644 index 0000000000000000000000000000000000000000..291c0bf428858daa0b308294ca7a41d913a07e86 --- /dev/null +++ b/project/workflow/rules/fastqc.smk @@ -0,0 +1,54 @@ +rule rawfastqc1: + input: + rf1 = "Data/SARS-Cov-2/{sample}_1.fastq.gz" + output: + html="results/fastqc/raw/{sample}_1_fastqc.html", + zip="results/fastqc/raw/{sample}_1_fastqc.zip" + conda: + "../envs/yourenv.yaml" + log: + "logs/fastqc/{sample}.log" + wrapper: + "v1.3.1/bio/fastqc" + + +rule rawfastqc2: + input: + rf2 = "Data/SARS-Cov-2/{sample}_2.fastq.gz" + output: + html="results/fastqc/raw/{sample}_2_fastqc.html", + zip="results/fastqc/raw/{sample}_2_fastqc.zip" + conda: + "../envs/yourenv.yaml" + log: + "logs/fastqc/{sample}.log" + wrapper: + "v1.3.1/bio/fastqc" + + +rule trimmedfastqc1: + input: + sample=["results/fastp/trimmed/{sample}.1.fastq","results/fastp/trimmed/{sample}.2.fastq"] + output: + html="results/fastqc/trimmed/{sample}_1_fastqc.html", + zip="results/fastqc/trimmed/{sample}_1_fastqc.zip" + conda: + "../envs/yourenv.yaml" + log: + "logs/fastqc/trimmed_{sample}.log" + wrapper: + "v1.3.1/bio/fastqc" + + +rule trimmedfastqc2: + input: + sample=["results/fastp/trimmed/{sample}.1.fastq","results/fastp/trimmed/{sample}.2.fastq"] + output: + html="results/fastqc/trimmed/{sample}_2_fastqc.html", + zip="results/fastqc/trimmed/{sample}_2_fastqc.zip" + conda: + "../envs/yourenv.yaml" + log: + "logs/fastqc/trimmed_{sample}.log" + wrapper: + "v1.3.1/bio/fastqc" diff --git a/project/workflow/rules/kraken.smk b/project/workflow/rules/kraken.smk index b745ea4f1d0f5a1074b8413dae4d590ea443d321..a0e71dffc227a93a7b1860decb442c3105c70325 100644 --- a/project/workflow/rules/kraken.smk +++ b/project/workflow/rules/kraken.smk @@ -1,6 +1,8 @@ rule build: + message: + "Building Kraken database..." output: - "krakendb" + database="krakendb" threads: 64 conda: "../envs/yourenv.yaml" @@ -18,8 +20,10 @@ rule build: """ rule report_trimmedreads: + message: + "Creating Kraken report using trimmed reads..." input: - db=config["kraken_db"], + database=config["kraken_db"], sample=["results/fastp/trimmed/{sample}.1.fastq", "results/fastp/trimmed/{sample}.2.fastq"] output: output="results/kraken/{sample}_trimmed.kraken", @@ -30,11 +34,13 @@ rule report_trimmedreads: log: "logs/kraken/report/{sample}_trimmed.log" shell: - "kraken2 --threads {threads} --db {input.db} --paired {input.sample} --report {output.report} > {output.output} 2> {log}" + "kraken2 --threads {threads} --db {input.database} --paired {input.sample} --report {output.report} > {output.output} 2> {log}" rule report_unmappedreads: + message: + "Creating Kraken report using unmapped reads..." input: - db="/buffer/ag_bsc/pmsb_workflows_2022/kraken_standard_db/standard_db", + database="/buffer/ag_bsc/pmsb_workflows_2022/kraken_standard_db/standard_db", sample=["results/unmapped_fastq/{sample}_1_unmapped.fastq", "results/unmapped_fastq/{sample}_2_unmapped.fastq"] output: output="results/kraken/{sample}_unmapped.kraken", @@ -45,4 +51,4 @@ rule report_unmappedreads: log: "logs/kraken/report/{sample}_unmapped.log" shell: - "kraken2 --threads {threads} --db {input.db} --paired {input.sample} --report {output.report} > {output.output} 2> {log}" \ No newline at end of file + "kraken2 --threads {threads} --db {input.database} --paired {input.sample} --report {output.report} > {output.output} 2> {log}" \ No newline at end of file diff --git a/project/workflow/rules/multiqc.smk b/project/workflow/rules/multiqc.smk index 86f269bcdb6d041ec15d11c55175c1486c653f60..9fcbf14bb96f2158de7f711162df09d6ed4742ad 100644 --- a/project/workflow/rules/multiqc.smk +++ b/project/workflow/rules/multiqc.smk @@ -1,12 +1,16 @@ rule multiqc: + message: + "Creating MultiQC report..." input: - "results/" + results="results/", + fastp="results/fastp/report/" output: - "results/multiqc/multiqc.html" + report="results/multiqc/multiqc.html" conda: "../envs/yourenv.yaml" shell: """ - multiqc -n multiqc.html {input} - mv multiqc.html {output} + multiqc -n multiqc.html {input.results} {input.fastp} + mv multiqc.html {output.report} + mv multiqc_data/ results/ """ \ No newline at end of file diff --git a/project/workflow/rules/quast.smk b/project/workflow/rules/quast.smk index 5b3215becd3e4a4d3d46fd6563e86a1e6b0a6b2c..7d518182ce3f466da2f2d2eb781ce642b6244e60 100644 --- a/project/workflow/rules/quast.smk +++ b/project/workflow/rules/quast.smk @@ -1,18 +1,22 @@ rule quast_IVA: + message: + "Creating QUAST report for IVA contigs..." input: - "results/IVA/{sample}/contigs.fasta" + contigs="results/IVA/{sample}/contigs.fasta" output: - directory("results/quast_IVA/{sample}") + quast_dir=directory("results/quast_IVA/{sample}") conda: "../envs/quast.yaml" threads: 4 log: "logs/quast_IVA/{sample}.log" shell: - "quast -o {output} -t {threads} {input} 2> {log}" + "quast -o {output.quast_dir} -t {threads} {input.contigs} 2> {log}" rule quast_shiver: + message: + "Creating QUAST report for shiver consensus genome..." input: consensus_genome="results/shiver/{sample}/{sample}.fasta", quast_ref_genome=config["viral_reference_alignment"] diff --git a/project/workflow/rules/samtools.smk b/project/workflow/rules/samtools.smk index f3a347a347c8c0f79e0ddd5478bafb7166ac8d30..5132b0d5564c6c1fea0b0d64eab6fa83228bc3e9 100644 --- a/project/workflow/rules/samtools.smk +++ b/project/workflow/rules/samtools.smk @@ -1,6 +1,8 @@ rule fastq: + message: + "Extracting unmapped reads..." input: - "results/bam/{sample}.bam" + bam="results/bam/{sample}.bam" output: fq1="results/unmapped_fastq/{sample}_1_unmapped.fastq", fq2="results/unmapped_fastq/{sample}_2_unmapped.fastq" @@ -11,7 +13,7 @@ rule fastq: "../envs/yourenv.yaml" shell: """ - samtools view --threads={threads} -b -f4 {input} > results/bam/{wildcards.sample}_unmapped.bam 2> {log} + samtools view --threads={threads} -b -f4 {input.bam} > results/bam/{wildcards.sample}_unmapped.bam 2> {log} samtools sort -n results/bam/{wildcards.sample}_unmapped.bam -o results/bam/{wildcards.sample}_sorted.bam 2>> {log} bedtools bamtofastq -i results/bam/{wildcards.sample}_sorted.bam -fq {output.fq1} -fq2 {output.fq2} 2>> {log} """ diff --git a/project/workflow/rules/shiver.smk b/project/workflow/rules/shiver.smk index 2e06a0a1a1ea0309d6689757e99edb096187c5f1..4229d3f7320e85a0cb77ff4ea98e4725246a7ce3 100644 --- a/project/workflow/rules/shiver.smk +++ b/project/workflow/rules/shiver.smk @@ -1,22 +1,26 @@ rule shiver_initialization: + message: + "Initializing shiver..." input: reference_alignment=config["viral_reference_alignment"], adapters=config["viral_sequencing_adapters"], primers=config["viral_sequencing_primers"], shiver_config=config["shiver_config_file"] output: - initialization_directory=directory("results/shiver/{sample}/{sample}_shiver_init_dir") + initialization_dir=directory("results/shiver/{sample}/{sample}_shiver_init_dir") conda: "../envs/shiver.yaml" log: "logs/shiver/{sample}/{sample}_init.log" shell: - "shiver_init.sh {output.initialization_directory} {input.shiver_config} {input.reference_alignment} {input.adapters} {input.primers} 2> {log}" + "shiver_init.sh {output.initialization_dir} {input.shiver_config} {input.reference_alignment} {input.adapters} {input.primers} 2> {log}" rule shiver_align_contigs_to_reference: + message: + "Aligning contigs to reference..." input: - initialization_directory=rules.shiver_initialization.output.initialization_directory, + initialization_dir=rules.shiver_initialization.output.initialization_dir, contigs_file="results/IVA/{sample}/contigs.fasta", shiver_config=config["shiver_config_file"] output: @@ -31,15 +35,17 @@ rule shiver_align_contigs_to_reference: """ mkdir shiver_temp_{wildcards.sample} cd shiver_temp_{wildcards.sample} - shiver_align_contigs.sh ../{input.initialization_directory} ../{input.shiver_config} ../{input.contigs_file} ../results/shiver/{wildcards.sample}/{wildcards.sample} > ../{log} 2>&1 + shiver_align_contigs.sh ../{input.initialization_dir} ../{input.shiver_config} ../{input.contigs_file} ../results/shiver/{wildcards.sample}/{wildcards.sample} > ../{log} 2>&1 cd .. rm -r shiver_temp_{wildcards.sample} """ rule shiver_read_remapping: + message: + "Mapping..." input: - initialization_directory=rules.shiver_initialization.output.initialization_directory, + initialization_dir=rules.shiver_initialization.output.initialization_dir, contigs="results/IVA/{sample}/contigs.fasta", aligned_contigs_raw=rules.shiver_align_contigs_to_reference.output.aligned_contigs_raw, blast_hits=rules.shiver_align_contigs_to_reference.output.blast_hits, @@ -63,16 +69,18 @@ rule shiver_read_remapping: """ mkdir shiver_temp_{wildcards.sample} cd shiver_temp_{wildcards.sample} - shiver_map_reads.sh ../{input.initialization_directory} ../{input.shiver_config} ../{input.contigs} ../results/shiver/{wildcards.sample}/{wildcards.sample} ../{input.blast_hits} ../{input.aligned_contigs_raw} ../{input.forward_read} ../{input.reverse_read} > ../{log} 2>&1 + shiver_map_reads.sh ../{input.initialization_dir} ../{input.shiver_config} ../{input.contigs} ../results/shiver/{wildcards.sample}/{wildcards.sample} ../{input.blast_hits} ../{input.aligned_contigs_raw} ../{input.forward_read} ../{input.reverse_read} > ../{log} 2>&1 cd .. rm -r shiver_temp_{wildcards.sample} """ rule tidy_shiver_output: + message: + "Tidying up..." input: consensus_genome="results/shiver/{sample}/{sample}_remap_consensus_MinCov_15_30.fasta" output: - "results/shiver/{sample}/{sample}.fasta" + tidy_consensus_genome="results/shiver/{sample}/{sample}.fasta" conda: "../envs/shiver.yaml" log: diff --git a/project/workflow/snakefile b/project/workflow/snakefile index 638ea719f0ab864918894d09069769134b3bbf4b..a015f5dd715e2867cd9da73661addb8a12718226 100644 --- a/project/workflow/snakefile +++ b/project/workflow/snakefile @@ -3,27 +3,31 @@ configfile: "config/config.yaml" samples = pd.read_table(config["samples"], index_col="sample") + rule all: input: - #"krakendb" - #"results/kraken/ERR4181713_report.txt" - #expand("results/kraken/{sample}_trimmed.kraken", sample=samples.index) + "results/multiqc/multiqc.html", #expand("results/fastp/trimmed/{sample}.1.fastq", sample=samples.index), - #expand("results/fastp/trimmed/{sample}.2.fastq", sample=samples.index), - #expand("results/IVA/{sample}", sample=samples.index) + expand("results/fastp/report/{sample}.json", sample=samples.index), + #expand("results/fastqc/raw/{sample}_1_fastqc.html", sample=samples.index), + #expand("results/fastqc/raw/{sample}_2_fastqc.html", sample=samples.index), + #expand("results/fastqc/trimmed/{sample}_1_fastqc.html", sample=samples.index), + #expand("results/fastqc/trimmed/{sample}_2_fastqc.html", sample=samples.index), + + #"results/reference/index.bt2", + #expand("results/kraken/{sample}_trimmed.kraken", sample=samples.index), #expand("results/kraken/{sample}_trimmed.kreport2", sample=samples.index), #expand("results/kraken/{sample}_unmapped.kraken", sample=samples.index), - #expand("results/kraken/{sample}_unmapped.kreport2", sample=samples.index) - #expand("results/shiver/{sample}/{sample}_shiver_init_dir", sample=samples.index) - #expand("results/shiver/{sample}/{sample}_remap_consensus_MinCov_15_30.fasta", sample=samples.index) - #expand("results/fastqc/raw/{sample}_1_fastqc.html", sample=samples.index), - #expand("results/fastqc/raw/{sample}_2_fastqc.html", sample=samples.index), - #expand("results/trimmed_fastqc/{sample}_1_fastqc.html", sample=samples.index), - #expand("results/trimmed_fastqc/{sample}_2_fastqc.html", sample=samples.index) - #expand("results/shiver/{sample}/{sample}.fasta", sample=samples.index) - #expand("results/quast_shiver/{sample}", sample=samples.index) - "results/multiqc/multiqc.html" + #expand("results/kraken/{sample}_unmapped.kreport2", sample=samples.index), + + #expand("results/IVA/{sample}", sample=samples.index), + #expand("results/shiver/{sample}/{sample}_shiver_init_dir", sample=samples.index), + #expand("results/shiver/{sample}/{sample}_remap_consensus_MinCov_15_30.fasta", sample=samples.index), + + #expand("results/shiver/{sample}/{sample}.fasta", sample=samples.index), + #expand("results/quast_shiver/{sample}", sample=samples.index), + include: "rules/fastp.smk" include: "rules/bowtie2.smk" @@ -37,3 +41,4 @@ include: "rules/multiqc.smk" +