From 84a786d4a700c96a4ea6904a321dad14367de865 Mon Sep 17 00:00:00 2001 From: petkoa97 <petkoa97@mi.fu-berlin.de> Date: Fri, 28 Apr 2023 15:48:52 +0200 Subject: [PATCH] fixed bug gffcompare --- .gitignore | 1 - config/config.yaml | 8 ++++---- resources/.gitignore | 4 ++++ run_snakemake.sh | 2 +- workflow/Snakefile | 3 +-- workflow/rules/TAMA_single.smk | 4 ++-- workflow/rules/functional_annotation.smk | 8 ++++++-- workflow/rules/gffcompare.smk | 6 +++--- 8 files changed, 21 insertions(+), 15 deletions(-) create mode 100644 resources/.gitignore diff --git a/.gitignore b/.gitignore index 90c5555..bfe0cc1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ /.snakemake /logs/* -/resources/* /results/* /tools/ diff --git a/config/config.yaml b/config/config.yaml index 90b249f..a74cc1f 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -2,10 +2,10 @@ output_name_prefix: "Homo_sapiens_4" input_HiFi_reads: - "test_datasets/dataset3_sim_hChr19.fastq" + "dataset3_sim_hChr19.fastq" input_reference: - "test_datasets/Homo_sapiens_ch19.fasta" + "Homo_sapiens_ch19.fasta" @@ -20,7 +20,7 @@ RUN_ISO_SEQ_COLLAPSE: True RUN_GFF_COMPARISON: True # Functional annotation (Database needs be configured) -RUN_BLAST: False +RUN_BLAST: True BLAST_DATABASE: "UniRefDB/uniref50.fasta" @@ -29,7 +29,7 @@ BLAST_DATABASE: "UniRefDB/uniref50.fasta" # Split input for TAMA Collapse # It's recomended to split larger sam files (>500k reads) -TAMA_split_input: True +TAMA_split_input: False TAMA_split_chunk_size_in_thousands: 500 # Choose a preset or configure parameters for TAMA Collapse diff --git a/resources/.gitignore b/resources/.gitignore new file mode 100644 index 0000000..86d0cb2 --- /dev/null +++ b/resources/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/run_snakemake.sh b/run_snakemake.sh index ce727c8..e3530b3 100644 --- a/run_snakemake.sh +++ b/run_snakemake.sh @@ -1,5 +1,5 @@ #!/bin/bash -#cd "$(dirname "$0")" +cd "$(dirname "$0")" snakemake --cores 10 --use-conda diff --git a/workflow/Snakefile b/workflow/Snakefile index cb47979..53f0aac 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -12,7 +12,6 @@ include: "rules/gffcompare.smk" include: "rules/functional_annotation.smk" - myoutput = [] @@ -20,7 +19,7 @@ if config["RUN_TAMA_COLLAPSE"] and config["TAMA_split_input"]: myoutput.append("results/structural_annotation/TAMA_collapse_merged_{output_name}/{output_name}.bed",) if config["RUN_TAMA_COLLAPSE"] and not config["TAMA_split_input"]: - myoutput.append("results/structural_annotation/TAMA_collapse_unmerged_{output_name}/{output_name}.bed") + myoutput.append("results/structural_annotation/TAMA_collapse_unmerged_{output_name}/Unmerged_{output_name}.bed") if config["RUN_STRINGTIE"]: myoutput.append("results/structural_annotation/{output_name}_stringtie_assembly.gtf") diff --git a/workflow/rules/TAMA_single.smk b/workflow/rules/TAMA_single.smk index cd19b8b..233b7b5 100644 --- a/workflow/rules/TAMA_single.smk +++ b/workflow/rules/TAMA_single.smk @@ -8,9 +8,9 @@ rule tama_collapse_single: reference = input_reference output: output_folder = directory("results/structural_annotation/TAMA_collapse_unmerged_{output_name}/"), - bed = "results/structural_annotation/TAMA_collapse_unmerged_{output_name}/{output_name}.bed" + bed = "results/structural_annotation/TAMA_collapse_unmerged_{output_name}/Unmerged_{output_name}.bed" params: - prefix = "results/structural_annotation/TAMA_collapse_unmerged_{output_name}/{output_name}", + prefix = "results/structural_annotation/TAMA_collapse_unmerged_{output_name}/Unmerged_{output_name}", param_string = get_TAMA_Collapse_param_string() log: "logs/{output_name}_tama_collapse.log" diff --git a/workflow/rules/functional_annotation.smk b/workflow/rules/functional_annotation.smk index 841b25c..038f08f 100644 --- a/workflow/rules/functional_annotation.smk +++ b/workflow/rules/functional_annotation.smk @@ -1,8 +1,12 @@ - +def get_bed(): + if config["TAMA_split_input"] == True: + return "results/structural_annotation/TAMA_collapse_merged_{output_name}/{output_name}.bed" + else: + return "results/structural_annotation/TAMA_collapse_unmerged_{output_name}/Unmerged_{output_name}.bed" rule bedtools_bed_to_fasta: input: - bed = "results/structural_annotation/TAMA_collapse_merged_{output_name}/{output_name}.bed", + bed = get_bed(), ref = input_reference output: fasta = "results/functional_annotation/{output_name}/{output_name}.fasta" diff --git a/workflow/rules/gffcompare.smk b/workflow/rules/gffcompare.smk index bae396b..0a1d409 100644 --- a/workflow/rules/gffcompare.smk +++ b/workflow/rules/gffcompare.smk @@ -14,10 +14,10 @@ rule convert_bed_gtf: def get_output_files(wildcards): input_list=[] - if config["RUN_TAMA_COLLAPSE"] and config["TAMA_split_input"]: - input_list.append("results/structural_annotation/TAMA_collapse_merged_{output_name}/{output_name}.gtf") + #if config["RUN_TAMA_COLLAPSE"] and config["TAMA_split_input"]: + # input_list.append("results/structural_annotation/TAMA_collapse_merged_{output_name}/{output_name}.gtf") if config["RUN_TAMA_COLLAPSE"] and not config["TAMA_split_input"]: - input_list.append("results/structural_annotation/TAMA_collapse_unmerged_{output_name}/{output_name}.gtf") + input_list.append("results/structural_annotation/TAMA_collapse_unmerged_{output_name}/Unmerged_{output_name}.gtf") if config["RUN_ISO_SEQ_COLLAPSE"]: input_list.extend(rules.iso_seq_collapse.output) if config["RUN_STRINGTIE"]: -- GitLab