diff --git a/final_project/README.md b/final_project/README.md
index f642db9a54e3f7ccd2dd8413363837b64f5f9bdf..fe28bdc4c0aa1aec76d8335ba0786f2935b4319a 100644
--- a/final_project/README.md
+++ b/final_project/README.md
@@ -1 +1,14 @@
-snakemake --config samples=input.tsv reference=/buffer/ag_bsc/PS_SEQAN_DATA/2023/DATA/REF/shigella_flexneri.fasta annotation=/buffer/ag_bsc/PS_SEQAN_DATA/2023/DATA/REF/shigella_flexneri.gff output_directory=/storage/mi/fisched99/output wgs=wgs.tsv --conda-prefix=/storage/mi/fisched99/condaCache  --profile config/local --rerun-incomplete
\ No newline at end of file
+### Run the workflow
+
+To run the workflow in reference based mode using a reference and annotation file use the following command:
+
+
+```
+snakemake --config samples=input.tsv reference=/buffer/ag_bsc/PS_SEQAN_DATA/2023/DATA/REF/shigella_flexneri.fasta annotation=/buffer/ag_bsc/PS_SEQAN_DATA/2023/DATA/REF/shigella_flexneri.gff output_directory=output  metadata=/buffer/ag_bsc/PS_SEQAN_DATA/2023/DATA/Lib2M/samples_sheet_2M.tsv --conda-prefix=/storage/mi/fisched99/condaCache  --profile config/local --rerun-incomplete
+```
+
+To run in denovo assembly mode add the path to the samplesheet (e.g.: wgs.tsv) for your WGS read data as an argument to the workflow as follows:
+
+```
+wgs=wgs.tsv
+```
\ No newline at end of file
diff --git a/final_project/workflow/Snakefile b/final_project/workflow/Snakefile
index a4558c7072715b94156fc9ce8b7849b4202103bf..97b906599fdd7bb38d18d55388bc2420b10b2535 100644
--- a/final_project/workflow/Snakefile
+++ b/final_project/workflow/Snakefile
@@ -27,33 +27,18 @@ include: "rules/phylogeny.smk"
 include: "rules/decontamination.smk"
 
 
-
-
-
-
 # define rule all
 rule all:
     input:
-        #expand(os.path.join(config['output_directory'], "02-ASSEMBLY", "samtools", "{sample}_consensus.fasta"), sample = list(samples.index)),
-        #expand(os.path.join(config['output_directory'], "03-ANNOTATION", "bedtools", "{sample}"), sample = list(samples.index)),
-        #os.path.join(config['output_directory'], "04-MSA", "gene_list.txt"),
-        #expand(os.path.join(config['output_directory'], "07-PHYLOGENY", "{gene}_msa.fasta"), gene = list(gene))
-        #checkpointbla
-        #os.path.join(config['output_directory'], "04-COUNTS", "featurecounts", "counts.tsv"),
-        #os.path.join(config['output_directory'], "00-Log", "phylogeny", "{gene}_mafft.log")
-        #os.path.join(config['output_directory'], "04-MSA", "gene_list.txt")
         os.path.join(config['output_directory'], "06-DGE", "dge_results.csv"),
-        os.path.join(config['output_directory'], "07-PHYLOGENY", "tree.nwk"),
-        os.path.join(config['output_directory'], "01-QC", "multiqc.html")
-
-        #os.path.join(config['output_directory'], "00-Log", "phylogeny", "concat_msa.log")
-        #checkcheck,
-        #os.path.join(config['output_directory'], "07-PHYLOGENY", "msa_all.fasta"),
-        #os.path.join(config['output_directory'], "04-MSA", "gene_list.txt")
-
+        os.path.join(config['output_directory'], "01-QC", "multiqc.html"),
+        os.path.join(config['output_directory'], "07-PHYLOGENY", "tree.png")
 
 
 onsuccess:
-    print(f"Workflow finished successfully.\nTryin' to hold tryin' to hold but there's nothing to grasp so I leet goooooo.\nGoodbye.")
+    # remove the intermediate gene files each containing one gene that we can't mark as temporary
+    # because they are output of the checkpoint and it wont run if we mark it as temp()
+    shutil.rmtree(os.path.join(config['output_directory'], "03-ANNOTATION", "bedtools"))
+    print(f"Workflow and cleanup finished successfully.\nTryin' to hold tryin' to hold but there's nothing to grasp so I leet goooooo.\nGoodbye.")
 
 
diff --git a/final_project/workflow/envs/draw_tree.yaml b/final_project/workflow/envs/draw_tree.yaml
index 96c6851cafefb5b243fea51d0c454ebf0c25a60b..ff42b3532f4e44b53694a0bb4421523a3a65c2a4 100644
--- a/final_project/workflow/envs/draw_tree.yaml
+++ b/final_project/workflow/envs/draw_tree.yaml
@@ -2,8 +2,7 @@ name: draw_tree
 channels:
   - bioconda
   - conda-forge
-  - anaconda
 dependencies:
+  - python>=3.7
   - biopython
-  - matplotlib
-  - argparse
\ No newline at end of file
+  - matplotlib
\ No newline at end of file
diff --git a/final_project/workflow/rules/annotation.smk b/final_project/workflow/rules/annotation.smk
index 92785d87407c4d13cc534729699088afe9ce4a90..1e79806728b5cc8e642d6e8b67b11e599095a225 100644
--- a/final_project/workflow/rules/annotation.smk
+++ b/final_project/workflow/rules/annotation.smk
@@ -4,6 +4,7 @@ def get_gff2bed_input(wildcards):
     else:
         return os.path.join(config['annotation'])
 
+
 rule gff2bed:
     input:
         get_gff2bed_input
@@ -44,6 +45,7 @@ checkpoint cut_genes:
         awk '/^>/{{OUT="{output.genes}/"substr($0,2)".fasta"; system("touch "OUT" 2>> {log}"); print ">"substr($0,2)"_sample_{wildcards.sample}" > OUT; next}}{{print > OUT}}' {output.genes_all} 2>> {log}
         """
 
+
 def get_prokka_input(wildcards):
     if config['polish']=="yes":
         return os.path.join(config['output_directory'], "02-ASSEMBLY", "polish", "assembly_polished.fasta")
@@ -68,12 +70,14 @@ rule prokka:
         prokka --outdir {output.outdir} --prefix annotation {input.assembly} --force --cpus {threads} &> {log}
         """
 
+
 def get_annotation(wildcards):
     if config['wgs']:
         return os.path.join(config['output_directory'], "03-ANNOTATION", "prokka", "annotation.gff")
     else:
         return config['annotation']
 
+
 rule gff2gtf:
     input:
         get_annotation
diff --git a/final_project/workflow/rules/assembly.smk b/final_project/workflow/rules/assembly.smk
index c6e3ac09252e18f35df22065b285970b084f212e..dfc784c3728c9f9f004b374f0bf71f3620c7054d 100644
--- a/final_project/workflow/rules/assembly.smk
+++ b/final_project/workflow/rules/assembly.smk
@@ -7,6 +7,7 @@ def get_hisat_ref(wildcards):
     else:
         return config['reference']
 
+
 rule hisat2_idx:
     input:
         reference = get_hisat_ref
@@ -48,6 +49,7 @@ rule hisat2_map:
         hisat2 -x {params.idx_prefix} -1 {input.r1} -2 {input.r2} -p {threads} 2> {log} | samtools view -bS 2> {log} | samtools sort -o {output} -T tmp --threads {threads} 2> {log}
         """
 
+
 rule samtools_consensus:
     input:
         sorted_bams = os.path.join(config['output_directory'], "02-ASSEMBLY", "hisat2_map", "{sample}_sorted.bam")
@@ -64,6 +66,7 @@ rule samtools_consensus:
         samtools consensus -f fasta -o {output.consensus} -d 5 {input.sorted_bams} 2> {log}
         """
 
+
 def get_spades_input(wildcards):
     if config["host_sequence"]:
         return { "r1" : expand(os.path.join(config["output_directory"], "01-QC", "screen", "samtools", "{sample}_decontaminated.1.fastq"), sample = list(wgs.index)),
@@ -95,6 +98,7 @@ rule spades:
         spades.py -o {output.outdir} -1 {output.reads_1} -2 {output.reads_2} -t {threads} --phred-offset 33 > {log}
         """
 
+
 rule polishing:
     input:
         assembly   = os.path.join(config['output_directory'], "02-ASSEMBLY", "spades", "contigs.fasta"),
diff --git a/final_project/workflow/rules/screen.smk b/final_project/workflow/rules/screen.smk
index 8358a5b789a02e550c9b47c473bef12cbf9c905b..adf389662b3749c801d21a7bbae36c5bf3237d41 100644
--- a/final_project/workflow/rules/screen.smk
+++ b/final_project/workflow/rules/screen.smk
@@ -17,6 +17,7 @@ rule kraken2:
         kraken2 --db {params.kraken2_db} --threads {threads} --paired --report {output} {input.r1} {input.r2} 2> {log} > /dev/null # we dont care about the actual output that is written to stdout (and is quite large)
         """
 
+
 def screen_input(wildcards):#otherwise wgs is not defined if we do not have wgs data
     if config['wgs']:
         return expand(os.path.join(config['output_directory'], "01-QC", "screen", "kraken2", "{sample}_report.txt"), sample = list(wgs.index))
@@ -24,6 +25,7 @@ def screen_input(wildcards):#otherwise wgs is not defined if we do not have wgs
         print("WARNING: Screening requested but not WGS reads were provided to screen. Exiting..")
         exit()
 
+
 rule screen:
     input:
         screen_input
diff --git a/final_project/workflow/scripts/DGE.R b/final_project/workflow/scripts/DGE.R
index 9f5a7ee5c5eae4434fc8ce5de7320b9c90ad83b2..dbb15fbd028505b18b7b1b7d5dd1ae14cb9a90d1 100644
--- a/final_project/workflow/scripts/DGE.R
+++ b/final_project/workflow/scripts/DGE.R
@@ -14,15 +14,10 @@ colorblind_friendly_pal <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E44
 
 ### read in feature counts output and with first line as header and column 'geneid' as rownames
 counts <- read.table(snakemake@input[[1]], header=TRUE, row.names = 1)
-#counts <- read.table('/home/david/Uni/appliedsequenceanalysis/final_project/output/05-COUNTS/featurecounts/counts.tsv', header=TRUE, row.names = 1)
-
-
 condition <- read.table(snakemake@input[[2]], header=TRUE, row.names = 1)
-#condition <- read.table('/home/david/Uni/appliedsequenceanalysis/Lib2M/samples_group_sheet.tsv', header=TRUE, row.names = 1)
 
 colnames(condition)[1] <- 'group'
 
-
 ### remove first 6 columns since they contain gene information but we only want count data now and convert to numeric
 counts <- counts[,6:ncol(counts)]
 counts[] <- sapply(counts, as.numeric)
@@ -55,8 +50,8 @@ dds <- DESeqDataSetFromMatrix(countData = counts,
         
 dds <- DESeq(dds)
 res <- results(dds)
-sorted_res <- res[order(res$padj, -res$log2FoldChange),]#sort rows for padj and then decreasing logfoldchange
-sorted_res <- sorted_res[1:20,!names(res) %in% c("stat", "baseMean", "pvalue","lfcSE")] #only take the first 20 rows and some columns
+sorted_res <- res[order(res$padj, -res$log2FoldChange),] # sort rows for padj and then decreasing logfoldchange
+sorted_res <- sorted_res[1:20,!names(res) %in% c("stat", "baseMean", "pvalue","lfcSE")] # only take the first 20 rows and some columns
 
 print(paste('Writing results to output file as', as.character(snakemake@output[['dge_results']]), '..'))
 write.csv(as.data.frame(sorted_res), snakemake@output[['dge_results']], row.names=TRUE)
@@ -64,13 +59,6 @@ write.csv(as.data.frame(sorted_res), snakemake@output[['dge_results']], row.name
 
 print(paste('Creating volcano plot as ', snakemake@output[['volcano_plot']], '..'))
 volcano_data <- data.frame(Log2FoldChange = res$log2FoldChange, Log10AdjustedPvalue = -log10(res$padj))
-#volcano_plot <- ggplot(volcano_data, aes(x=Log2FoldChange, y=-Log10AdjustedPvalue)) +
-#                       geom_point(aes(color=ifelse(abs(Log2FoldChange) >= 1 & res$padj < 0.05, "red", "black"))) +
-#                       labs(title="Volcano Plot",
-#                            x="log2(Fold Change)",
-#                            y="-log10(Adjusted p-value)") +
-#                       theme_minimal()
-#bin <- ggsave(plot=volcano_plot, filename=snakemake@output[['volcano_plot']], width=6, height=4, dpi=300)
 
 
 plot.volcano <-function(df, title_txt, subtitle_txt){
@@ -91,7 +79,6 @@ bin <- ggsave(plot=volcano_plot, filename=snakemake@output[['volcano_plot']], wi
 
 
 #Heatmap
-# Assuming 'samples' is a vector of sample names
 print(paste('Creating clustered heatmap as', snakemake@output[['heatmap']], '..'))
 de_genes <- rownames(subset(res, padj < 0.05))
 de_counts <- counts(dds)[de_genes, rownames(condition)]
diff --git a/final_project/workflow/scripts/draw_tree.py b/final_project/workflow/scripts/draw_tree.py
index e80ed40e93bc2e7791c8223029db8956de116439..966822650bb710637c9a8f253b928d6b55d813a6 100644
--- a/final_project/workflow/scripts/draw_tree.py
+++ b/final_project/workflow/scripts/draw_tree.py
@@ -1,12 +1,17 @@
 import matplotlib.pyplot as plt
 from Bio import Phylo
-import argparse
+import sys
 
-parser = argparse.ArgumentParser(description="Description of your program", epilog="Example usage: python3 draw_tree.py <TREE_NWK> <PLOT_PNG>" )
-parser.add_argument("TREE_NWK", help="Tree input file name. (newick formatted)")
-parser.add_argument("PLOT_PNG", help="Tree plot output file name.")
-args = parser.parse_args()
 
-Phylo.draw(Phylo.read(tree_nwk, "newick"), do_show = False)
-plt.title(f"Phylogenetic tree over all samples clustered by variance of genes above coverage threshold")
-plt.savefig(tree_plot)
\ No newline at end of file
+stdout = sys.stdout
+stderr = sys.stderr
+
+with open(snakemake.log[0], 'w') as sys.stdout:
+
+    sys.stderr = sys.stdout
+    Phylo.draw(Phylo.read(snakemake.input[0], "newick"), do_show = False)
+    plt.title(f"Phylogenetic tree over all samples clustered by variance of genes")
+    plt.savefig(snakemake.output[0])
+
+sys.stdout = stdout
+sys.stderr = stderr
\ No newline at end of file