diff --git a/tutorial/config/config.yaml b/tutorial/config/config.yaml index 8442899b0b7895b63203b079ad3197fdf8762fc6..37cdf79d0ec26509e431967e4bd2f05adc6b324b 100644 --- a/tutorial/config/config.yaml +++ b/tutorial/config/config.yaml @@ -1,7 +1,7 @@ --- index: "results/reference/index" -samples: "resources/samples.tsv" +samples: "resources/sample/samples.tsv" bowtieparams: "-q" @@ -10,3 +10,5 @@ bowtie: N: 0 # Sets the number of mismatches to allowed in a seed alignment during multiseed alignment. L: 22 # Sets the length of the seed substrings to align during multiseed alignment. extra: "--ignore-quals --end-to-end" # users can put here all other parameters or simply leave empty + +adapter: "resources/adapter/TruSeq3-PE.fa" diff --git a/tutorial/resources/adapter/TruSeq3-PE.fa b/tutorial/resources/adapter/TruSeq3-PE.fa new file mode 100644 index 0000000000000000000000000000000000000000..f38a9d55eae838be2aca30c9c05166d302d8a2d4 --- /dev/null +++ b/tutorial/resources/adapter/TruSeq3-PE.fa @@ -0,0 +1,4 @@ +>PrefixPE/1 +TACACTCTTTCCCTACACGACGCTCTTCCGATCT +>PrefixPE/2 +GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT diff --git a/tutorial/resources/sample/samples.tsv b/tutorial/resources/sample/samples.tsv new file mode 100644 index 0000000000000000000000000000000000000000..fbcea5fb20568588675e95bd3f738da350915ca2 --- /dev/null +++ b/tutorial/resources/sample/samples.tsv @@ -0,0 +1,7 @@ +sample fq1 fq2 +ERR024604 results/fastq/ERR024604_1.fastq.gz results/fastq/ERR024604_2.fastq.gz +ERR024605 results/fastq/ERR024605_1.fastq.gz results/fastq/ERR024605_2.fastq.gz +ERR024606 results/fastq/ERR024606_1.fastq.gz results/fastq/ERR024606_2.fastq.gz +ERR024607 results/fastq/ERR024607_1.fastq.gz results/fastq/ERR024607_2.fastq.gz +ERR024608 results/fastq/ERR024608_1.fastq.gz results/fastq/ERR024608_2.fastq.gz +ERR024609 results/fastq/ERR024609_1.fastq.gz results/fastq/ERR024609_2.fastq.gz diff --git a/tutorial/workflow/Snakefile b/tutorial/workflow/Snakefile index 70a6239e369b08f2010a4b90fbf7d6014259ebfc..d20a56c37835cca8a707fd0e33155aa5577a31ee 100644 --- a/tutorial/workflow/Snakefile +++ b/tutorial/workflow/Snakefile @@ -6,6 +6,10 @@ samples = pd.read_table(config["samples"], index_col="sample") rule all: input: + expand("results/trimmed/{sample}_forward_paired.fq.gz", sample=samples.index), + expand("results/trimmed/{sample}_forward_unpaired.fq.gz", sample=samples.index), + expand("results/trimmed/{sample}_reverse_paired.fq.gz", sample=samples.index), + expand("results/trimmed/{sample}_reverse_unpaired.fq.gz", sample=samples.index), "results/aggregate/mappedcounts.csv", expand("results/stats/{sample}.stats.txt", sample=samples.index), expand("results/fastqc/{sample}_1_fastqc.html", sample=samples.index), @@ -20,3 +24,5 @@ include:"rules/bowtie.smk" include:"rules/samtools.smk" include:"rules/aggregate.smk" + +include:"rules/trimming.smk" diff --git a/tutorial/workflow/envs/yourenv.yaml b/tutorial/workflow/envs/yourenv.yaml index fb2f959cb9a0ddadaaac60f7241d2a7adaeae5cc..bba82dd5d1878b53ebb7a151af0d144de6863369 100644 --- a/tutorial/workflow/envs/yourenv.yaml +++ b/tutorial/workflow/envs/yourenv.yaml @@ -3,6 +3,7 @@ channels: - bioconda - defaults dependencies: + - pandas=1.3.5 - bowtie2=2.3.5.1=py37h2dec4b4_0 - c-ares=1.18.1=hca72f7f_0 - ca-certificates=2022.2.1=hecd8cb5_0 @@ -22,10 +23,10 @@ dependencies: - pip=21.2.2=py37hecd8cb5_0 - python=3.7.11=h88f2d9e_0 - readline=8.1.2=hca72f7f_1 - - samtools=1.4.1=0 + - samtools=1.3.1=0 - setuptools=58.0.4=py37hecd8cb5_0 - sqlite=3.37.2=h707629a_0 - - tbb=2021.5.0=haf03e11_0 + - tbb=2020.2 - tk=8.6.11=h7bc2e8c_0 - wheel=0.37.1=pyhd3eb1b0_0 - xz=5.2.5=h1de35cc_0 diff --git a/tutorial/workflow/rules/bowtie.smk b/tutorial/workflow/rules/bowtie.smk index b60c52ed2bf2fe3cab2504a0ed595d0d524158b1..de9b35524881001c0d36c52e59ca0a41f831686b 100644 --- a/tutorial/workflow/rules/bowtie.smk +++ b/tutorial/workflow/rules/bowtie.smk @@ -16,8 +16,8 @@ rule refindex: rule map: input: "results/reference/index.bt2", - r1 = lambda wildcards: samples.at[wildcards.sample,'fq1'] if wildcards.sample in samples.index else ' ', - r2 = lambda wildcards: samples.at[wildcards.sample,'fq2'] if wildcards.sample in samples.index else ' ' + "results/trimmed/{sample}_forward_paired.fq.gz", + "results/trimmed/{sample}_reverse_paired.fq.gz" params: prefix=config["index"], bowtieparams=config["bowtieparams"] diff --git a/tutorial/workflow/rules/samtools.smk b/tutorial/workflow/rules/samtools.smk index dc63db98607ed1018f97b8135d28edeb7c2fbeee..5c1d0109f3ee42dc8d5687510d977328d9c9bccb 100644 --- a/tutorial/workflow/rules/samtools.smk +++ b/tutorial/workflow/rules/samtools.smk @@ -42,8 +42,7 @@ rule stats: "results/bam_sorted/{sample}.bam.bai" output: "results/stats/{sample}.stats.txt" - threads:4 conda: "../envs/yourenv.yaml" shell: - "samtools idxstats --threads={threads} {input[0]} > {output}" + "samtools idxstats {input[0]} > {output}" diff --git a/tutorial/workflow/rules/trimming.smk b/tutorial/workflow/rules/trimming.smk index 9ee44de1d9f7197834b4392ccbdbfe66257e2d7c..f2fb3f1006c43e1ada14f62b433a37f2118cdf9b 100644 --- a/tutorial/workflow/rules/trimming.smk +++ b/tutorial/workflow/rules/trimming.smk @@ -2,7 +2,16 @@ rule trimming: input: rt1 = lambda wildcards: samples.at[wildcards.sample,'fq1'] if wildcards.sample in samples.index else ' ', rt2 = lambda wildcards: samples.at[wildcards.sample,'fq2'] if wildcards.sample in samples.index else ' ' + params: + adapter=config["adapter"] output: - "results/trimmed/{sample}.fq.gz" + "results/trimmed/{sample}_forward_paired.fq.gz", + "results/trimmed/{sample}_forward_unpaired.fq.gz", + "results/trimmed/{sample}_reverse_paired.fq.gz", + "results/trimmed/{sample}_reverse_unpaired.fq.gz" + conda: + "../envs/yourenv.yaml" + log: + "workflow/report/trimming/{sample}.log" shell: - "java -jar trimmomatic-0.39.jar PE {input[0]} {input[1]} {output} ILLUMINACLIP:TruSeq3-PE.fa:2:30:10:2:True LEADING:3 TRAILING:3 MINLEN:36" + "trimmomatic PE {input[0]} {input[1]} {output} ILLUMINACLIP:{params.adapter}:2:30:10:2:True LEADING:3 TRAILING:3 MINLEN:36"