Skip to content
Snippets Groups Projects
Commit 0d3f9a4c authored by seehagec01's avatar seehagec01
Browse files

fix #4 + final workflow

parent d377dd55
Branches main
No related tags found
No related merge requests found
......@@ -2,10 +2,10 @@
samples: "resources/sample/sample.tsv"
human_reference: "resources/reference/human.fa"
index: "results/reference/index"
bowtieparams: "-q"
bowtie_params: "-q"
kraken_db: "/buffer/ag_bsc/pmsb_workflows_2022/kraken_standard_db/standard_db"
viral_reference_alignment: "resources/reference/SARS-CoV-2.fasta"
shiver_config_file: "config/shiver_config.sh"
viral_sequencing_adapters: "resources/adapters/SARS-CoV-2.fasta"
viral_sequencing_primers: "resources/primers/SARS-CoV-2.fasta"
fastp_params: "--adapter_sequence TCTCGGTC --adapter_sequence_r2 GCGGACTT"
\ No newline at end of file
fastp_params: "--adapter_sequence TCTCGGTC --adapter_sequence_r2 GCGGACTT"
rule IVA:
message:
"Genome Assembly..."
input:
fq1="results/unmapped_fastq/{sample}_1_unmapped.fastq",
fq2="results/unmapped_fastq/{sample}_2_unmapped.fastq"
output:
directory("results/IVA/{sample}/")
IVA_dir=directory("results/IVA/{sample}/")
log:
"logs/IVA/{sample}.log"
threads: 16
conda:
"../envs/yourenv.yaml"
shell:
"iva -f {input.fq1} -r {input.fq2} {output} --threads {threads} 2> {log}"
"iva -f {input.fq1} -r {input.fq2} {output.IVA_dir} --threads {threads} 2> {log}"
\ No newline at end of file
rule refindex:
message:
"Building mapping index..."
input:
prefix=config["human_reference"]
params:
prefix=config["index"]
output:
"results/reference/index.1.bt2"
"results/reference/index.bt2"
log:
"logs/bowtie2/refindex/index.1.log"
threads:4
conda:
"../envs/yourenv.yaml"
shell:
"v1.3.2/bio/bowtie2/build"
"bowtie2-build {input} {params.prefix} > {output}"
rule map:
message:
"Mapping reads to human reference..."
input:
idx=multiext(
"results/reference/index",
......@@ -27,7 +31,7 @@ rule map:
),
sample=["results/fastp/trimmed/{sample}.1.fastq", "results/fastp/trimmed/{sample}.2.fastq"]
params:
extra=config["bowtieparams"]
extra=config["bowtie_params"]
output:
"results/bam/{sample}.bam"
threads:4
......
rule fastp:
message:
"Quality controlling raw data..."
input:
sample=["Data/SARS-Cov-2/{sample}_1.fastq.gz", "Data/SARS-Cov-2/{sample}_2.fastq.gz"]
output:
trimmed=["results/fastp/trimmed/{sample}.1.fastq", "results/fastp/trimmed/{sample}.2.fastq"],
html="report/fastp/{sample}.html",
json="report/fastp/{sample}.json"
html="results/fastp/report/{sample}.html",
json="results/fastp/report/{sample}.json"
log:
"logs/fastp/{sample}.log"
conda:
"../envs/yourenv.yaml"
params:
extra=config["fastp_params]
extra=config["fastp_params"]
threads: 4
wrapper:
"v1.3.2/bio/fastp"
\ No newline at end of file
rule rawfastqc1:
input:
rf1 = "Data/SARS-Cov-2/{sample}_1.fastq.gz"
output:
html="results/fastqc/raw/{sample}_1_fastqc.html",
zip="results/fastqc/raw/{sample}_1_fastqc.zip"
conda:
"../envs/yourenv.yaml"
log:
"logs/fastqc/{sample}.log"
wrapper:
"v1.3.1/bio/fastqc"
rule rawfastqc2:
input:
rf2 = "Data/SARS-Cov-2/{sample}_2.fastq.gz"
output:
html="results/fastqc/raw/{sample}_2_fastqc.html",
zip="results/fastqc/raw/{sample}_2_fastqc.zip"
conda:
"../envs/yourenv.yaml"
log:
"logs/fastqc/{sample}.log"
wrapper:
"v1.3.1/bio/fastqc"
rule trimmedfastqc1:
input:
sample=["results/fastp/trimmed/{sample}.1.fastq","results/fastp/trimmed/{sample}.2.fastq"]
output:
html="results/fastqc/trimmed/{sample}_1_fastqc.html",
zip="results/fastqc/trimmed/{sample}_1_fastqc.zip"
conda:
"../envs/yourenv.yaml"
log:
"logs/fastqc/trimmed_{sample}.log"
wrapper:
"v1.3.1/bio/fastqc"
rule trimmedfastqc2:
input:
sample=["results/fastp/trimmed/{sample}.1.fastq","results/fastp/trimmed/{sample}.2.fastq"]
output:
html="results/fastqc/trimmed/{sample}_2_fastqc.html",
zip="results/fastqc/trimmed/{sample}_2_fastqc.zip"
conda:
"../envs/yourenv.yaml"
log:
"logs/fastqc/trimmed_{sample}.log"
wrapper:
"v1.3.1/bio/fastqc"
rule build:
message:
"Building Kraken database..."
output:
"krakendb"
database="krakendb"
threads: 64
conda:
"../envs/yourenv.yaml"
......@@ -18,8 +20,10 @@ rule build:
"""
rule report_trimmedreads:
message:
"Creating Kraken report using trimmed reads..."
input:
db=config["kraken_db"],
database=config["kraken_db"],
sample=["results/fastp/trimmed/{sample}.1.fastq", "results/fastp/trimmed/{sample}.2.fastq"]
output:
output="results/kraken/{sample}_trimmed.kraken",
......@@ -30,11 +34,13 @@ rule report_trimmedreads:
log:
"logs/kraken/report/{sample}_trimmed.log"
shell:
"kraken2 --threads {threads} --db {input.db} --paired {input.sample} --report {output.report} > {output.output} 2> {log}"
"kraken2 --threads {threads} --db {input.database} --paired {input.sample} --report {output.report} > {output.output} 2> {log}"
rule report_unmappedreads:
message:
"Creating Kraken report using unmapped reads..."
input:
db="/buffer/ag_bsc/pmsb_workflows_2022/kraken_standard_db/standard_db",
database="/buffer/ag_bsc/pmsb_workflows_2022/kraken_standard_db/standard_db",
sample=["results/unmapped_fastq/{sample}_1_unmapped.fastq", "results/unmapped_fastq/{sample}_2_unmapped.fastq"]
output:
output="results/kraken/{sample}_unmapped.kraken",
......@@ -45,4 +51,4 @@ rule report_unmappedreads:
log:
"logs/kraken/report/{sample}_unmapped.log"
shell:
"kraken2 --threads {threads} --db {input.db} --paired {input.sample} --report {output.report} > {output.output} 2> {log}"
\ No newline at end of file
"kraken2 --threads {threads} --db {input.database} --paired {input.sample} --report {output.report} > {output.output} 2> {log}"
\ No newline at end of file
rule multiqc:
message:
"Creating MultiQC report..."
input:
"results/"
results="results/",
fastp="results/fastp/report/"
output:
"results/multiqc/multiqc.html"
report="results/multiqc/multiqc.html"
conda:
"../envs/yourenv.yaml"
shell:
"""
multiqc -n multiqc.html {input}
mv multiqc.html {output}
multiqc -n multiqc.html {input.results} {input.fastp}
mv multiqc.html {output.report}
mv multiqc_data/ results/
"""
\ No newline at end of file
rule quast_IVA:
message:
"Creating QUAST report for IVA contigs..."
input:
"results/IVA/{sample}/contigs.fasta"
contigs="results/IVA/{sample}/contigs.fasta"
output:
directory("results/quast_IVA/{sample}")
quast_dir=directory("results/quast_IVA/{sample}")
conda:
"../envs/quast.yaml"
threads: 4
log:
"logs/quast_IVA/{sample}.log"
shell:
"quast -o {output} -t {threads} {input} 2> {log}"
"quast -o {output.quast_dir} -t {threads} {input.contigs} 2> {log}"
rule quast_shiver:
message:
"Creating QUAST report for shiver consensus genome..."
input:
consensus_genome="results/shiver/{sample}/{sample}.fasta",
quast_ref_genome=config["viral_reference_alignment"]
......
rule fastq:
message:
"Extracting unmapped reads..."
input:
"results/bam/{sample}.bam"
bam="results/bam/{sample}.bam"
output:
fq1="results/unmapped_fastq/{sample}_1_unmapped.fastq",
fq2="results/unmapped_fastq/{sample}_2_unmapped.fastq"
......@@ -11,7 +13,7 @@ rule fastq:
"../envs/yourenv.yaml"
shell:
"""
samtools view --threads={threads} -b -f4 {input} > results/bam/{wildcards.sample}_unmapped.bam 2> {log}
samtools view --threads={threads} -b -f4 {input.bam} > results/bam/{wildcards.sample}_unmapped.bam 2> {log}
samtools sort -n results/bam/{wildcards.sample}_unmapped.bam -o results/bam/{wildcards.sample}_sorted.bam 2>> {log}
bedtools bamtofastq -i results/bam/{wildcards.sample}_sorted.bam -fq {output.fq1} -fq2 {output.fq2} 2>> {log}
"""
......
rule shiver_initialization:
message:
"Initializing shiver..."
input:
reference_alignment=config["viral_reference_alignment"],
adapters=config["viral_sequencing_adapters"],
primers=config["viral_sequencing_primers"],
shiver_config=config["shiver_config_file"]
output:
initialization_directory=directory("results/shiver/{sample}/{sample}_shiver_init_dir")
initialization_dir=directory("results/shiver/{sample}/{sample}_shiver_init_dir")
conda:
"../envs/shiver.yaml"
log:
"logs/shiver/{sample}/{sample}_init.log"
shell:
"shiver_init.sh {output.initialization_directory} {input.shiver_config} {input.reference_alignment} {input.adapters} {input.primers} 2> {log}"
"shiver_init.sh {output.initialization_dir} {input.shiver_config} {input.reference_alignment} {input.adapters} {input.primers} 2> {log}"
rule shiver_align_contigs_to_reference:
message:
"Aligning contigs to reference..."
input:
initialization_directory=rules.shiver_initialization.output.initialization_directory,
initialization_dir=rules.shiver_initialization.output.initialization_dir,
contigs_file="results/IVA/{sample}/contigs.fasta",
shiver_config=config["shiver_config_file"]
output:
......@@ -31,15 +35,17 @@ rule shiver_align_contigs_to_reference:
"""
mkdir shiver_temp_{wildcards.sample}
cd shiver_temp_{wildcards.sample}
shiver_align_contigs.sh ../{input.initialization_directory} ../{input.shiver_config} ../{input.contigs_file} ../results/shiver/{wildcards.sample}/{wildcards.sample} > ../{log} 2>&1
shiver_align_contigs.sh ../{input.initialization_dir} ../{input.shiver_config} ../{input.contigs_file} ../results/shiver/{wildcards.sample}/{wildcards.sample} > ../{log} 2>&1
cd ..
rm -r shiver_temp_{wildcards.sample}
"""
rule shiver_read_remapping:
message:
"Mapping..."
input:
initialization_directory=rules.shiver_initialization.output.initialization_directory,
initialization_dir=rules.shiver_initialization.output.initialization_dir,
contigs="results/IVA/{sample}/contigs.fasta",
aligned_contigs_raw=rules.shiver_align_contigs_to_reference.output.aligned_contigs_raw,
blast_hits=rules.shiver_align_contigs_to_reference.output.blast_hits,
......@@ -63,16 +69,18 @@ rule shiver_read_remapping:
"""
mkdir shiver_temp_{wildcards.sample}
cd shiver_temp_{wildcards.sample}
shiver_map_reads.sh ../{input.initialization_directory} ../{input.shiver_config} ../{input.contigs} ../results/shiver/{wildcards.sample}/{wildcards.sample} ../{input.blast_hits} ../{input.aligned_contigs_raw} ../{input.forward_read} ../{input.reverse_read} > ../{log} 2>&1
shiver_map_reads.sh ../{input.initialization_dir} ../{input.shiver_config} ../{input.contigs} ../results/shiver/{wildcards.sample}/{wildcards.sample} ../{input.blast_hits} ../{input.aligned_contigs_raw} ../{input.forward_read} ../{input.reverse_read} > ../{log} 2>&1
cd ..
rm -r shiver_temp_{wildcards.sample}
"""
rule tidy_shiver_output:
message:
"Tidying up..."
input:
consensus_genome="results/shiver/{sample}/{sample}_remap_consensus_MinCov_15_30.fasta"
output:
"results/shiver/{sample}/{sample}.fasta"
tidy_consensus_genome="results/shiver/{sample}/{sample}.fasta"
conda:
"../envs/shiver.yaml"
log:
......
......@@ -3,27 +3,31 @@ configfile: "config/config.yaml"
samples = pd.read_table(config["samples"], index_col="sample")
rule all:
input:
#"krakendb"
#"results/kraken/ERR4181713_report.txt"
#expand("results/kraken/{sample}_trimmed.kraken", sample=samples.index)
"results/multiqc/multiqc.html",
#expand("results/fastp/trimmed/{sample}.1.fastq", sample=samples.index),
#expand("results/fastp/trimmed/{sample}.2.fastq", sample=samples.index),
#expand("results/IVA/{sample}", sample=samples.index)
expand("results/fastp/report/{sample}.json", sample=samples.index),
#expand("results/fastqc/raw/{sample}_1_fastqc.html", sample=samples.index),
#expand("results/fastqc/raw/{sample}_2_fastqc.html", sample=samples.index),
#expand("results/fastqc/trimmed/{sample}_1_fastqc.html", sample=samples.index),
#expand("results/fastqc/trimmed/{sample}_2_fastqc.html", sample=samples.index),
#"results/reference/index.bt2",
#expand("results/kraken/{sample}_trimmed.kraken", sample=samples.index),
#expand("results/kraken/{sample}_trimmed.kreport2", sample=samples.index),
#expand("results/kraken/{sample}_unmapped.kraken", sample=samples.index),
#expand("results/kraken/{sample}_unmapped.kreport2", sample=samples.index)
#expand("results/shiver/{sample}/{sample}_shiver_init_dir", sample=samples.index)
#expand("results/shiver/{sample}/{sample}_remap_consensus_MinCov_15_30.fasta", sample=samples.index)
#expand("results/fastqc/raw/{sample}_1_fastqc.html", sample=samples.index),
#expand("results/fastqc/raw/{sample}_2_fastqc.html", sample=samples.index),
#expand("results/trimmed_fastqc/{sample}_1_fastqc.html", sample=samples.index),
#expand("results/trimmed_fastqc/{sample}_2_fastqc.html", sample=samples.index)
#expand("results/shiver/{sample}/{sample}.fasta", sample=samples.index)
#expand("results/quast_shiver/{sample}", sample=samples.index)
"results/multiqc/multiqc.html"
#expand("results/kraken/{sample}_unmapped.kreport2", sample=samples.index),
#expand("results/IVA/{sample}", sample=samples.index),
#expand("results/shiver/{sample}/{sample}_shiver_init_dir", sample=samples.index),
#expand("results/shiver/{sample}/{sample}_remap_consensus_MinCov_15_30.fasta", sample=samples.index),
#expand("results/shiver/{sample}/{sample}.fasta", sample=samples.index),
#expand("results/quast_shiver/{sample}", sample=samples.index),
include: "rules/fastp.smk"
include: "rules/bowtie2.smk"
......@@ -37,3 +41,4 @@ include: "rules/multiqc.smk"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment