diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..9f0666a725ddf766c61ec264e07125e59af3793d Binary files /dev/null and b/.DS_Store differ diff --git a/config/config.yaml b/config/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8442899b0b7895b63203b079ad3197fdf8762fc6 --- /dev/null +++ b/config/config.yaml @@ -0,0 +1,12 @@ +--- +index: "results/reference/index" + +samples: "resources/samples.tsv" + +bowtieparams: "-q" + +# schöner wäre sowas: +bowtie: + N: 0 # Sets the number of mismatches to allowed in a seed alignment during multiseed alignment. + L: 22 # Sets the length of the seed substrings to align during multiseed alignment. + extra: "--ignore-quals --end-to-end" # users can put here all other parameters or simply leave empty diff --git a/workflow/.DS_Store b/workflow/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..294de6bec7faff2e889ed62c1b20f8f1aa172492 Binary files /dev/null and b/workflow/.DS_Store differ diff --git a/workflow/.snakemake/log/2022-03-15T135851.892224.snakemake.log b/workflow/.snakemake/log/2022-03-15T135851.892224.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..5043d4d3930c1c91ebee23c03815fd3a1f0ffd7e --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T135851.892224.snakemake.log @@ -0,0 +1,4 @@ +IndentationError in line 6 of <tokenize>: +unindent does not match any outer indentation level (<tokenize>, line 6) + File "/Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/Snakefile", line 11, in <module> + File "/opt/homebrew/Cellar/python@3.10/3.10.2/Frameworks/Python.framework/Versions/3.10/lib/python3.10/tokenize.py", line 514, in _tokenize diff --git a/workflow/.snakemake/log/2022-03-15T140112.429011.snakemake.log b/workflow/.snakemake/log/2022-03-15T140112.429011.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..5043d4d3930c1c91ebee23c03815fd3a1f0ffd7e --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T140112.429011.snakemake.log @@ -0,0 +1,4 @@ +IndentationError in line 6 of <tokenize>: +unindent does not match any outer indentation level (<tokenize>, line 6) + File "/Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/Snakefile", line 11, in <module> + File "/opt/homebrew/Cellar/python@3.10/3.10.2/Frameworks/Python.framework/Versions/3.10/lib/python3.10/tokenize.py", line 514, in _tokenize diff --git a/workflow/.snakemake/log/2022-03-15T140222.370755.snakemake.log b/workflow/.snakemake/log/2022-03-15T140222.370755.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..5043d4d3930c1c91ebee23c03815fd3a1f0ffd7e --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T140222.370755.snakemake.log @@ -0,0 +1,4 @@ +IndentationError in line 6 of <tokenize>: +unindent does not match any outer indentation level (<tokenize>, line 6) + File "/Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/Snakefile", line 11, in <module> + File "/opt/homebrew/Cellar/python@3.10/3.10.2/Frameworks/Python.framework/Versions/3.10/lib/python3.10/tokenize.py", line 514, in _tokenize diff --git a/workflow/.snakemake/log/2022-03-15T140225.807213.snakemake.log b/workflow/.snakemake/log/2022-03-15T140225.807213.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..5043d4d3930c1c91ebee23c03815fd3a1f0ffd7e --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T140225.807213.snakemake.log @@ -0,0 +1,4 @@ +IndentationError in line 6 of <tokenize>: +unindent does not match any outer indentation level (<tokenize>, line 6) + File "/Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/Snakefile", line 11, in <module> + File "/opt/homebrew/Cellar/python@3.10/3.10.2/Frameworks/Python.framework/Versions/3.10/lib/python3.10/tokenize.py", line 514, in _tokenize diff --git a/workflow/.snakemake/log/2022-03-15T140318.383706.snakemake.log b/workflow/.snakemake/log/2022-03-15T140318.383706.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..b0300ae48e3acd80cfa9803ae73862fd360cffd1 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T140318.383706.snakemake.log @@ -0,0 +1,4 @@ +IndentationError in line 6 of <tokenize>: +unindent does not match any outer indentation level (<tokenize>, line 6) + File "/Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/Snakefile", line 10, in <module> + File "/opt/homebrew/Cellar/python@3.10/3.10.2/Frameworks/Python.framework/Versions/3.10/lib/python3.10/tokenize.py", line 514, in _tokenize diff --git a/workflow/.snakemake/log/2022-03-15T140349.569585.snakemake.log b/workflow/.snakemake/log/2022-03-15T140349.569585.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..5043d4d3930c1c91ebee23c03815fd3a1f0ffd7e --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T140349.569585.snakemake.log @@ -0,0 +1,4 @@ +IndentationError in line 6 of <tokenize>: +unindent does not match any outer indentation level (<tokenize>, line 6) + File "/Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/Snakefile", line 11, in <module> + File "/opt/homebrew/Cellar/python@3.10/3.10.2/Frameworks/Python.framework/Versions/3.10/lib/python3.10/tokenize.py", line 514, in _tokenize diff --git a/workflow/.snakemake/log/2022-03-15T140610.870264.snakemake.log b/workflow/.snakemake/log/2022-03-15T140610.870264.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..5043d4d3930c1c91ebee23c03815fd3a1f0ffd7e --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T140610.870264.snakemake.log @@ -0,0 +1,4 @@ +IndentationError in line 6 of <tokenize>: +unindent does not match any outer indentation level (<tokenize>, line 6) + File "/Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/Snakefile", line 11, in <module> + File "/opt/homebrew/Cellar/python@3.10/3.10.2/Frameworks/Python.framework/Versions/3.10/lib/python3.10/tokenize.py", line 514, in _tokenize diff --git a/workflow/.snakemake/log/2022-03-15T140703.847723.snakemake.log b/workflow/.snakemake/log/2022-03-15T140703.847723.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..ebed290053861282c3c6b4471189f488ddb289c7 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T140703.847723.snakemake.log @@ -0,0 +1,7 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024604_tiny.sam + wildcards: sample=ERR024604_tiny + affected files: + results/fastq/ERR024604_tiny.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T140938.788774.snakemake.log b/workflow/.snakemake/log/2022-03-15T140938.788774.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..af61bfe6ac18521a739f26348c3a5f6db2a62f28 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T140938.788774.snakemake.log @@ -0,0 +1,4 @@ +Building DAG of jobs... +WildcardError in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Wildcards in input files cannot be determined from output files: +'number' diff --git a/workflow/.snakemake/log/2022-03-15T141038.010182.snakemake.log b/workflow/.snakemake/log/2022-03-15T141038.010182.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..599925ce230ac762a883cfe7711e8fd5ed446dd8 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141038.010182.snakemake.log @@ -0,0 +1,7 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024608_tiny.sam + wildcards: sample=ERR024608_tiny + affected files: + results/fastq/ERR024608_tiny_1.fastq.gzresults/fastq/ERR024608_tiny_2.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141257.023361.snakemake.log b/workflow/.snakemake/log/2022-03-15T141257.023361.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..599925ce230ac762a883cfe7711e8fd5ed446dd8 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141257.023361.snakemake.log @@ -0,0 +1,7 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024608_tiny.sam + wildcards: sample=ERR024608_tiny + affected files: + results/fastq/ERR024608_tiny_1.fastq.gzresults/fastq/ERR024608_tiny_2.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141415.922744.snakemake.log b/workflow/.snakemake/log/2022-03-15T141415.922744.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..37ae37e33fb8b3c8cef147ffa58a7a08f75ce8c3 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141415.922744.snakemake.log @@ -0,0 +1,7 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024604_tiny.sam + wildcards: sample=ERR024604_tiny + affected files: + results/fastq/ERR024604_tiny_1.fastq.gzresults/fastq/ERR024604_tiny_2.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141432.576962.snakemake.log b/workflow/.snakemake/log/2022-03-15T141432.576962.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..e12ba89a9f32724651687a1101cd777287b4546c --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141432.576962.snakemake.log @@ -0,0 +1,7 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024606_tiny.sam + wildcards: sample=ERR024606_tiny + affected files: + results/fastq/ERR024606_tiny_1.fastq.gzresults/fastq/ERR024606_tiny_2.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141436.384821.snakemake.log b/workflow/.snakemake/log/2022-03-15T141436.384821.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..f39f7d9e9cc1cf5a523486889fcefb5637070537 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141436.384821.snakemake.log @@ -0,0 +1,7 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024605_tiny.sam + wildcards: sample=ERR024605_tiny + affected files: + results/fastq/ERR024605_tiny_1.fastq.gzresults/fastq/ERR024605_tiny_2.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141447.281345.snakemake.log b/workflow/.snakemake/log/2022-03-15T141447.281345.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..599925ce230ac762a883cfe7711e8fd5ed446dd8 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141447.281345.snakemake.log @@ -0,0 +1,7 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024608_tiny.sam + wildcards: sample=ERR024608_tiny + affected files: + results/fastq/ERR024608_tiny_1.fastq.gzresults/fastq/ERR024608_tiny_2.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141505.214014.snakemake.log b/workflow/.snakemake/log/2022-03-15T141505.214014.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..159fab4ff2037b42d35b0dcf7ad49cc5b22a2ed1 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141505.214014.snakemake.log @@ -0,0 +1,8 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024606_tiny.sam + wildcards: sample=ERR024606_tiny + affected files: + results/fastq/ERR024606_tiny_2.fastq.gz + results/fastq/ERR024606_tiny_1.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141509.059492.snakemake.log b/workflow/.snakemake/log/2022-03-15T141509.059492.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..0e148e992d3a9b70a0d22b6e2ce0a8daf4c420d4 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141509.059492.snakemake.log @@ -0,0 +1,8 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024605_tiny.sam + wildcards: sample=ERR024605_tiny + affected files: + results/fastq/ERR024605_tiny_2.fastq.gz + results/fastq/ERR024605_tiny_1.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141559.573733.snakemake.log b/workflow/.snakemake/log/2022-03-15T141559.573733.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..6bbae34f1a5a93b901da4e226f890efca0d9f6f3 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141559.573733.snakemake.log @@ -0,0 +1,8 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024604_tiny.sam + wildcards: sample=ERR024604_tiny + affected files: + results/fastq/ERR024604_tiny_2.fastq.gz + results/fastq/ERR024604_tiny_1.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141603.058242.snakemake.log b/workflow/.snakemake/log/2022-03-15T141603.058242.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..4b3d34f99c0c186ebf736f2d3369f96c1f2318f0 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141603.058242.snakemake.log @@ -0,0 +1,8 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024609_tiny.sam + wildcards: sample=ERR024609_tiny + affected files: + results/fastq/ERR024609_tiny_2.fastq.gz + results/fastq/ERR024609_tiny_1.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141609.906141.snakemake.log b/workflow/.snakemake/log/2022-03-15T141609.906141.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..18abd4938f452118c5178bfc861c7fda3a12af19 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141609.906141.snakemake.log @@ -0,0 +1,8 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024605_tiny.sam + wildcards: sample=ERR024605_tiny + affected files: + results/fastq/ERR024605_tiny_1.fastq.gz + results/fastq/ERR024605_tiny_2.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141632.425650.snakemake.log b/workflow/.snakemake/log/2022-03-15T141632.425650.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..18abd4938f452118c5178bfc861c7fda3a12af19 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141632.425650.snakemake.log @@ -0,0 +1,8 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024605_tiny.sam + wildcards: sample=ERR024605_tiny + affected files: + results/fastq/ERR024605_tiny_1.fastq.gz + results/fastq/ERR024605_tiny_2.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T141842.449333.snakemake.log b/workflow/.snakemake/log/2022-03-15T141842.449333.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..3c0e49282a5552f90e31aa71f812d88aa27b3534 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T141842.449333.snakemake.log @@ -0,0 +1,8 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024609_tiny.sam + wildcards: sample=ERR024609_tiny + affected files: + results/fastq/ERR024609_tiny_1.fastq.gz + results/fastq/ERR024609_tiny_2.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T142135.708957.snakemake.log b/workflow/.snakemake/log/2022-03-15T142135.708957.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..6bbae34f1a5a93b901da4e226f890efca0d9f6f3 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T142135.708957.snakemake.log @@ -0,0 +1,8 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024604_tiny.sam + wildcards: sample=ERR024604_tiny + affected files: + results/fastq/ERR024604_tiny_2.fastq.gz + results/fastq/ERR024604_tiny_1.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T142139.848286.snakemake.log b/workflow/.snakemake/log/2022-03-15T142139.848286.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..f8967863c1a468e383785481d279a7eb52742cf7 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T142139.848286.snakemake.log @@ -0,0 +1,8 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024606_tiny.sam + wildcards: sample=ERR024606_tiny + affected files: + results/fastq/ERR024606_tiny_1.fastq.gz + results/fastq/ERR024606_tiny_2.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T142146.756227.snakemake.log b/workflow/.snakemake/log/2022-03-15T142146.756227.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..159fab4ff2037b42d35b0dcf7ad49cc5b22a2ed1 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T142146.756227.snakemake.log @@ -0,0 +1,8 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule map: + output: results/sam/ERR024606_tiny.sam + wildcards: sample=ERR024606_tiny + affected files: + results/fastq/ERR024606_tiny_2.fastq.gz + results/fastq/ERR024606_tiny_1.fastq.gz diff --git a/workflow/.snakemake/log/2022-03-15T143128.835585.snakemake.log b/workflow/.snakemake/log/2022-03-15T143128.835585.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..0e452c8b7bc4ae042c88c8108ec897b60733a813 --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T143128.835585.snakemake.log @@ -0,0 +1,6 @@ +Building DAG of jobs... +MissingInputException in line 3 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule refindex: + output: results/reference/index + affected files: + results/reference/reference.fa diff --git a/workflow/.snakemake/log/2022-03-15T143316.866263.snakemake.log b/workflow/.snakemake/log/2022-03-15T143316.866263.snakemake.log new file mode 100644 index 0000000000000000000000000000000000000000..61d78e4f854772d6e927140e84e9f0b8ea545b7d --- /dev/null +++ b/workflow/.snakemake/log/2022-03-15T143316.866263.snakemake.log @@ -0,0 +1,6 @@ +Building DAG of jobs... +MissingInputException in line 1 of /Users/charlotteseehagen/Documents/Uni/swp-workflows/workflow/rules/bowtie.smk: +Missing input files for rule refindex: + output: results/reference/index + affected files: + results/reference/reference.fa diff --git a/workflow/Snakefile b/workflow/Snakefile new file mode 100644 index 0000000000000000000000000000000000000000..b2f07fd629fdc54666a22d7eaec6d6297a3a6a37 --- /dev/null +++ b/workflow/Snakefile @@ -0,0 +1,18 @@ +import pandas as pd +configfile: "config/config.yaml" + +samples = pd.read_table(config["samples"], index_col="sample") + + +rule all: + input: + expand("results/stats/{sample}.stats.txt", sample=samples.index), + expand("results/fastqc/{sample}_1_fastqc.html", sample=samples.index), + expand("results/fastqc/{sample}_1_fastqc.zip", sample=samples.index), + expand("results/fastqc/{sample}_2_fastqc.html", sample=samples.index), + expand("results/fastqc/{sample}_2_fastqc.zip", sample=samples.index) +include:"rules/fastqc.smk" + +include:"rules/bowtie.smk" + +include:"rules/samtools.smk" diff --git a/workflow/envs/.DS_Store b/workflow/envs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/workflow/envs/.DS_Store differ diff --git a/workflow/envs/yourenv.yaml b/workflow/envs/yourenv.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb2f959cb9a0ddadaaac60f7241d2a7adaeae5cc --- /dev/null +++ b/workflow/envs/yourenv.yaml @@ -0,0 +1,33 @@ +name: yourenv.yaml +channels: + - bioconda + - defaults +dependencies: + - bowtie2=2.3.5.1=py37h2dec4b4_0 + - c-ares=1.18.1=hca72f7f_0 + - ca-certificates=2022.2.1=hecd8cb5_0 + - certifi=2021.10.8=py37hecd8cb5_2 + - curl=7.80.0=hca72f7f_0 + - krb5=1.19.2=hcd88c3b_0 + - libcurl=7.80.0=h6dfd666_0 + - libcxx=12.0.0=h2f01273_0 + - libedit=3.1.20210910=hca72f7f_0 + - libev=4.33=h9ed2024_1 + - libffi=3.3=hb1e8313_2 + - libnghttp2=1.46.0=ha29bfda_0 + - libssh2=1.9.0=ha12b0ac_1 + - ncurses=6.3=hca72f7f_2 + - openssl=1.1.1m=hca72f7f_0 + - perl=5.26.2=h4e221da_0 + - pip=21.2.2=py37hecd8cb5_0 + - python=3.7.11=h88f2d9e_0 + - readline=8.1.2=hca72f7f_1 + - samtools=1.4.1=0 + - setuptools=58.0.4=py37hecd8cb5_0 + - sqlite=3.37.2=h707629a_0 + - tbb=2021.5.0=haf03e11_0 + - tk=8.6.11=h7bc2e8c_0 + - wheel=0.37.1=pyhd3eb1b0_0 + - xz=5.2.5=h1de35cc_0 + - zlib=1.2.11=h4dc903c_4 +prefix: /Users/charlotteseehagen/opt/anaconda3/envs/yourenv.yaml diff --git a/workflow/rules/bowtie.smk b/workflow/rules/bowtie.smk new file mode 100644 index 0000000000000000000000000000000000000000..b60c52ed2bf2fe3cab2504a0ed595d0d524158b1 --- /dev/null +++ b/workflow/rules/bowtie.smk @@ -0,0 +1,32 @@ +rule refindex: + input: + "results/reference/reference.fa" + params: + prefix=config["index"] + output: + "results/reference/index.bt2" + threads:4 + conda: + "../envs/yourenv.yaml" + shell: + "bowtie2-build {input} {params.prefix} > {output}" + + + +rule map: + input: + "results/reference/index.bt2", + r1 = lambda wildcards: samples.at[wildcards.sample,'fq1'] if wildcards.sample in samples.index else ' ', + r2 = lambda wildcards: samples.at[wildcards.sample,'fq2'] if wildcards.sample in samples.index else ' ' + params: + prefix=config["index"], + bowtieparams=config["bowtieparams"] + output: + "results/sam/{sample}.sam" + log: + "workflow/report/mapping/{sample}.log" + threads:4 + conda: + "../envs/yourenv.yaml" + shell: + "bowtie2 {params.bowtieparams} -x {params.prefix} --threads={threads} -1 {input[1]} -2 {input[2]} > {output} 2> {log}" diff --git a/workflow/rules/fastqc.smk b/workflow/rules/fastqc.smk new file mode 100644 index 0000000000000000000000000000000000000000..73f9daed2175b91ce4b69e7c8e57d61347021845 --- /dev/null +++ b/workflow/rules/fastqc.smk @@ -0,0 +1,25 @@ +rule rawfastqc1: + input: + rf1 = lambda wildcards: samples.at[wildcards.sample,'fq1'] if wildcards.sample in samples.index else ' ' + output: + html="results/fastqc/{sample}_1_fastqc.html", + zip="results/fastqc/{sample}_1_fastqc.zip" + conda: + "../envs/yourenv.yaml" + log: + "workflow/report/fastqc/{sample}.log" + wrapper: + "v1.3.1/bio/fastqc" + +rule rawfastqc2: + input: + rf2 = lambda wildcards: samples.at[wildcards.sample,'fq2'] if wildcards.sample in samples.index else ' ' + output: + html="results/fastqc/{sample}_2_fastqc.html", + zip="results/fastqc/{sample}_2_fastqc.zip" + conda: + "../envs/yourenv.yaml" + log: + "workflow/report/fastqc/{sample}.log" + wrapper: + "v1.3.1/bio/fastqc" diff --git a/workflow/rules/samtools.smk b/workflow/rules/samtools.smk new file mode 100644 index 0000000000000000000000000000000000000000..dc63db98607ed1018f97b8135d28edeb7c2fbeee --- /dev/null +++ b/workflow/rules/samtools.smk @@ -0,0 +1,49 @@ +rule convert: + input: + "results/sam/{sample}.sam" + output: + "results/bam/{sample}.bam" + threads:4 + conda: + "../envs/yourenv.yaml" + shell: + "samtools view -S -b --threads={threads} {input} > {output}" + + +rule sort: + input: + "results/bam/{sample}.bam" + output: + "results/bam_sorted/{sample}.sorted.bam" + log: + "workflow/report/sort/{sample}.log" + threads:4 + conda: + "../envs/yourenv.yaml" + shell: + "samtools sort --threads={threads} {input} > {output} 2> {log}" + + +rule index: + input: + "results/bam_sorted/{sample}.sorted.bam" + output: + "results/bam_sorted/{sample}.sorted.bam.bai" + threads:4 + conda: + "../envs/yourenv.yaml" + shell: + "samtools index -@ {threads} {input}" + + +rule stats: + input: + "results/bam_sorted/{sample}.sorted.bam", + "results/bam_sorted/{sample}.bam.bai" + output: + "results/stats/{sample}.stats.txt" + threads:4 + conda: + "../envs/yourenv.yaml" + shell: + "samtools idxstats --threads={threads} {input[0]} > {output}" diff --git a/workflow/rules/trimming.smk b/workflow/rules/trimming.smk new file mode 100644 index 0000000000000000000000000000000000000000..9ee44de1d9f7197834b4392ccbdbfe66257e2d7c --- /dev/null +++ b/workflow/rules/trimming.smk @@ -0,0 +1,8 @@ +rule trimming: + input: + rt1 = lambda wildcards: samples.at[wildcards.sample,'fq1'] if wildcards.sample in samples.index else ' ', + rt2 = lambda wildcards: samples.at[wildcards.sample,'fq2'] if wildcards.sample in samples.index else ' ' + output: + "results/trimmed/{sample}.fq.gz" + shell: + "java -jar trimmomatic-0.39.jar PE {input[0]} {input[1]} {output} ILLUMINACLIP:TruSeq3-PE.fa:2:30:10:2:True LEADING:3 TRAILING:3 MINLEN:36"