From 84a786d4a700c96a4ea6904a321dad14367de865 Mon Sep 17 00:00:00 2001
From: petkoa97 <petkoa97@mi.fu-berlin.de>
Date: Fri, 28 Apr 2023 15:48:52 +0200
Subject: [PATCH] fixed bug gffcompare

---
 .gitignore                               | 1 -
 config/config.yaml                       | 8 ++++----
 resources/.gitignore                     | 4 ++++
 run_snakemake.sh                         | 2 +-
 workflow/Snakefile                       | 3 +--
 workflow/rules/TAMA_single.smk           | 4 ++--
 workflow/rules/functional_annotation.smk | 8 ++++++--
 workflow/rules/gffcompare.smk            | 6 +++---
 8 files changed, 21 insertions(+), 15 deletions(-)
 create mode 100644 resources/.gitignore

diff --git a/.gitignore b/.gitignore
index 90c5555..bfe0cc1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,4 @@
 /.snakemake
 /logs/*
-/resources/*
 /results/*
 /tools/
diff --git a/config/config.yaml b/config/config.yaml
index 90b249f..a74cc1f 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -2,10 +2,10 @@ output_name_prefix:
   "Homo_sapiens_4"
 
 input_HiFi_reads:
-  "test_datasets/dataset3_sim_hChr19.fastq"
+  "dataset3_sim_hChr19.fastq"
 
 input_reference:
-  "test_datasets/Homo_sapiens_ch19.fasta"
+  "Homo_sapiens_ch19.fasta"
 
   
 
@@ -20,7 +20,7 @@ RUN_ISO_SEQ_COLLAPSE:   True
 RUN_GFF_COMPARISON:     True
 
 # Functional annotation (Database needs be configured)
-RUN_BLAST:              False
+RUN_BLAST:              True
 BLAST_DATABASE: "UniRefDB/uniref50.fasta"
 
 
@@ -29,7 +29,7 @@ BLAST_DATABASE: "UniRefDB/uniref50.fasta"
 
 # Split input for TAMA Collapse
 # It's recomended to split larger sam files (>500k reads) 
-TAMA_split_input: True
+TAMA_split_input: False
 TAMA_split_chunk_size_in_thousands: 500
 
 # Choose a preset or configure parameters for TAMA Collapse
diff --git a/resources/.gitignore b/resources/.gitignore
new file mode 100644
index 0000000..86d0cb2
--- /dev/null
+++ b/resources/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
\ No newline at end of file
diff --git a/run_snakemake.sh b/run_snakemake.sh
index ce727c8..e3530b3 100644
--- a/run_snakemake.sh
+++ b/run_snakemake.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-#cd "$(dirname "$0")"
+cd "$(dirname "$0")"
 
 snakemake --cores 10 --use-conda
 
diff --git a/workflow/Snakefile b/workflow/Snakefile
index cb47979..53f0aac 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -12,7 +12,6 @@ include: "rules/gffcompare.smk"
 include: "rules/functional_annotation.smk"
 
 
-
 myoutput = []
 
 
@@ -20,7 +19,7 @@ if config["RUN_TAMA_COLLAPSE"] and config["TAMA_split_input"]:
     myoutput.append("results/structural_annotation/TAMA_collapse_merged_{output_name}/{output_name}.bed",)
 
 if config["RUN_TAMA_COLLAPSE"] and not config["TAMA_split_input"]:
-    myoutput.append("results/structural_annotation/TAMA_collapse_unmerged_{output_name}/{output_name}.bed")
+    myoutput.append("results/structural_annotation/TAMA_collapse_unmerged_{output_name}/Unmerged_{output_name}.bed")
 
 if config["RUN_STRINGTIE"]:
     myoutput.append("results/structural_annotation/{output_name}_stringtie_assembly.gtf")
diff --git a/workflow/rules/TAMA_single.smk b/workflow/rules/TAMA_single.smk
index cd19b8b..233b7b5 100644
--- a/workflow/rules/TAMA_single.smk
+++ b/workflow/rules/TAMA_single.smk
@@ -8,9 +8,9 @@ rule tama_collapse_single:
         reference     = input_reference
     output: 
         output_folder = directory("results/structural_annotation/TAMA_collapse_unmerged_{output_name}/"),
-        bed           = "results/structural_annotation/TAMA_collapse_unmerged_{output_name}/{output_name}.bed"
+        bed           = "results/structural_annotation/TAMA_collapse_unmerged_{output_name}/Unmerged_{output_name}.bed"
     params:
-        prefix =        "results/structural_annotation/TAMA_collapse_unmerged_{output_name}/{output_name}",
+        prefix =        "results/structural_annotation/TAMA_collapse_unmerged_{output_name}/Unmerged_{output_name}",
         param_string = get_TAMA_Collapse_param_string()
     log:
         "logs/{output_name}_tama_collapse.log"
diff --git a/workflow/rules/functional_annotation.smk b/workflow/rules/functional_annotation.smk
index 841b25c..038f08f 100644
--- a/workflow/rules/functional_annotation.smk
+++ b/workflow/rules/functional_annotation.smk
@@ -1,8 +1,12 @@
-
+def get_bed():
+    if config["TAMA_split_input"] == True:
+        return "results/structural_annotation/TAMA_collapse_merged_{output_name}/{output_name}.bed"
+    else:
+        return "results/structural_annotation/TAMA_collapse_unmerged_{output_name}/Unmerged_{output_name}.bed"
 
 rule bedtools_bed_to_fasta:
     input:
-        bed = "results/structural_annotation/TAMA_collapse_merged_{output_name}/{output_name}.bed",
+        bed = get_bed(),
         ref = input_reference
     output:
         fasta =  "results/functional_annotation/{output_name}/{output_name}.fasta"
diff --git a/workflow/rules/gffcompare.smk b/workflow/rules/gffcompare.smk
index bae396b..0a1d409 100644
--- a/workflow/rules/gffcompare.smk
+++ b/workflow/rules/gffcompare.smk
@@ -14,10 +14,10 @@ rule convert_bed_gtf:
 def get_output_files(wildcards):
     input_list=[]
     
-    if config["RUN_TAMA_COLLAPSE"] and config["TAMA_split_input"]:
-        input_list.append("results/structural_annotation/TAMA_collapse_merged_{output_name}/{output_name}.gtf") 
+    #if config["RUN_TAMA_COLLAPSE"] and config["TAMA_split_input"]:
+    #    input_list.append("results/structural_annotation/TAMA_collapse_merged_{output_name}/{output_name}.gtf") 
     if config["RUN_TAMA_COLLAPSE"] and not config["TAMA_split_input"]:
-        input_list.append("results/structural_annotation/TAMA_collapse_unmerged_{output_name}/{output_name}.gtf")
+        input_list.append("results/structural_annotation/TAMA_collapse_unmerged_{output_name}/Unmerged_{output_name}.gtf")
     if config["RUN_ISO_SEQ_COLLAPSE"]:
         input_list.extend(rules.iso_seq_collapse.output)
     if config["RUN_STRINGTIE"]:
-- 
GitLab