diff --git a/rules/run_05_11_21.smk b/rules/run_05_11_21.smk index aae4f1687d52a61c976983223063db6764d5fd18..c2c847181aa2124dbd736cf5d7db10a7aaef5c47 100644 --- a/rules/run_05_11_21.smk +++ b/rules/run_05_11_21.smk @@ -135,7 +135,7 @@ def altFile(wildcards): if samples.loc[(wildcards.asmID), "ALT_present"] == True: return os.path.join(config['Results'], "1_evaluation/{asmID}/01_unzipFastas/{asmID}.ALT.fasta") else: - return [] + return os.path.join(workflow.basedir, "scripts/ALT_missing.fasta") rule merqury: input: @@ -396,8 +396,8 @@ rule saveConfiguration_and_getKeyValues: echo "$(grep 'NG50' {input.contStats} | awk {{'print $4'}})" >> {params.keyValues} echo "$(grep 'N95' {input.contStats} | awk {{'print $2'}})" >> {params.keyValues} echo "$(grep 'NG95' {input.contStats} | awk {{'print $4'}})" >> {params.keyValues} - echo "$(awk {{'print $4'}} {input.qv})" >> {params.keyValues} - echo "$(awk {{'print $5'}} {input.completeness})" >> {params.keyValues} + echo "$(awk {{'print $4'}} {input.qv})" | head -n 1 >> {params.keyValues} + echo "$(awk {{'print $5'}} {input.completeness})" | head -n 1 >> {params.keyValues} echo "$(grep 'C:' {input.busco} | awk -F'[:\[,]' {{'print $2'}})" >> {params.keyValues} echo "$(grep 'C:' {input.busco} | awk -F'[:\[,]' {{'print $4'}})" >> {params.keyValues} dos2unix {params.keyValues} @@ -503,10 +503,10 @@ rule aggregateAllAssemblies: sampleSheet= config['samplesTSV'], config=os.path.join(workflow.basedir, "configuration/config.yaml") output: - results=os.path.join(config['Results'],"1_evaluation/Combined_Results_FULLTABLE.tsv"), + results=os.path.join(config['Results'],"1_evaluation/finalResults/Combined_Results_FULLTABLE.tsv"), # rows=os.path.join(config['Results'],"allAssemblies_keyResults/aggregate_rows.tsv"), - newSampleSheet=os.path.join(config['Results'],"1_evaluation/savedSampleSheet.tsv"), - newConfigFile=os.path.join(config['Results'],"1_evaluation/savedConfig.yaml") + newSampleSheet=os.path.join(config['Results'],"1_evaluation/finalResults/savedSampleSheet.tsv"), + newConfigFile=os.path.join(config['Results'],"1_evaluation/finalResults/savedConfig.yaml") shell: """ cp {input.sampleSheet} {output.newSampleSheet} @@ -563,28 +563,50 @@ rule makeReport: # awk '{{sub("PLACEHOLDER","{input[0]}")}}1' {input.landingPage} > {output[0]} # """ +rule addFullTable: + input: + results=os.path.join(config['Results'],"1_evaluation/finalResults/Combined_Results_FULLTABLE.tsv") + output: + os.path.join(config['Results'],"1_evaluation/finalResults/FullTableMarkdown.md") + run: + import pandas as pd + from tabulate import tabulate + samples=pd.read_csv(input.results, dtype=str, index_col=0, delim_whitespace=True, skip_blank_lines=True) + with open(output[0], "w") as out: + print("", file=out) + print("\\onecolumn", file=out) + print("", file=out) + print("\\tiny", file=out) + print(tabulate(samples, headers='keys',tablefmt="pipe", showindex=True), file=out) + print("", file=out) rule aggregateReport: input: indivMD=[expand(os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_k{kmer}_markdownForReport.md"), asmID=key, kmer=value4) for key, [value1, value2, value3, value4, value5, value6, value7, value8] in testDict.items()], - landingPage=os.path.join(workflow.basedir, "scripts/reportLandingPage.md") + landingPage=os.path.join(workflow.basedir, "scripts/reportLandingPage.md"), + endTableMD=os.path.join(config['Results'],"1_evaluation/finalResults/FullTableMarkdown.md") output: - os.path.join(config['Results'],"1_evaluation/FullMarkdown.md") + os.path.join(config['Results'],"1_evaluation/finalResults/FullMarkdown.md") shell: """ - cat {input.landingPage} > {output} - cat {input.indivMD} >> {output} + cat {input.landingPage} {input.indivMD} {input.endTableMD} >> {output} """ + + + + + rule makePDF: input: - os.path.join(config['Results'],"1_evaluation/FullMarkdown.md") + os.path.join(config['Results'],"1_evaluation/finalResults/FullMarkdown.md") output: - os.path.join(config['Results'],"1_evaluation/FULL_Report_PDF.pdf") + os.path.join(config['Results'],"1_evaluation/finalResults/FULL_Report_PDF.pdf") shell: """ pandoc -o {output} {input} """ + # echo -e "Assembly\nsize_estimate\nmax_heterozygosity\nqv_score\nN50_length\nNG50_length\nN95_length\nNG95_length\nN100_length\nNG100_length\ntotal_num_bases\ntotal_num_scaffolds" > {output.rows} # paste -d'\t' {output.rows} {input.allResults} > {output.results} # awk '{print $1"\t"$3"\t"$5}' {output.results} | column -t {output.results} diff --git a/scripts/ALT_missing.fasta b/scripts/ALT_missing.fasta new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391