diff --git a/rules/evaluate.smk b/rules/evaluate.smk index 196ea7bb390f96288799b879b153bd2783d34a27..ae3c27936ca4a36c0bcc7c64dbb9862d79b9b2cd 100644 --- a/rules/evaluate.smk +++ b/rules/evaluate.smk @@ -125,7 +125,38 @@ rule indexFasta_asm1: (samtools faidx {input.assembly_asm1}) &>> {log} """ -rule convertFastqTObam_R1_HiC: +def asm2File(wildcards): + if samples.loc[(wildcards.asmID), "ALT_present"] == True: + return os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta") + else: + return os.path.join(workflow.basedir, "scripts/standIN_files/ALT_missing.fasta") + +rule indexFasta_asm2: + input: + assembly_asm2=asm2File, + output: + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.0123"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.amb"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.ann"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.bwt.2bit.64"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.pac"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.fai") + log: + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/indexASM.asm2.{asmID}.log") + conda: + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") + threads: + resource['indexFasta_PRI']['threads'] + resources: + mem_mb=resource['indexFasta_PRI']['mem_mb'], + time=resource['indexFasta_PRI']['time'] + shell: + """ + (bwa-mem2 index {input.assembly_asm2}) &> {log} + (samtools faidx {input.assembly_asm2}) &>> {log} + """ + +rule convertFastqTObam_R1_HiC_asm1: input: HiC_R1=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.fastq"), assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta"), @@ -136,7 +167,7 @@ rule convertFastqTObam_R1_HiC: os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.pac"), os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.fai") ] output: - temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.bam")) + temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.R1.bam")) conda: os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: @@ -145,13 +176,39 @@ rule convertFastqTObam_R1_HiC: mem_mb=resource['convertFastqTObam_R1_HiC']['mem_mb'], time=resource['convertFastqTObam_R1_HiC']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/fastq2bam.HiC.R1.{asmID}.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/fastq2bam.asm1.HiC.R1.{asmID}.log") shell: """ (bwa-mem2 mem -t {threads} -B8 {input.assembly} {input.HiC_R1} | samtools view -Sb - > {output}) &> {log} """ -rule convertFastqTObam_R2_HiC: +rule convertFastqTObam_R1_HiC_asm2: + input: + HiC_R1=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.fastq"), + assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta"), + indexes= [ os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.0123"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.amb"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.ann"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.bwt.2bit.64"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.pac"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.fai") ] + output: + temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.R1.bam")) + conda: + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") + threads: + resource['convertFastqTObam_R1_HiC']['threads'] + resources: + mem_mb=resource['convertFastqTObam_R1_HiC']['mem_mb'], + time=resource['convertFastqTObam_R1_HiC']['time'] + log: + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/fastq2bam.asm2.HiC.R1.{asmID}.log") + shell: + """ + (bwa-mem2 mem -t {threads} -B8 {input.assembly} {input.HiC_R1} | samtools view -Sb - > {output}) &> {log} + """ + +rule convertFastqTObam_R2_HiC_asm1: input: HiC_R2=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.fastq"), assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta"), @@ -162,7 +219,7 @@ rule convertFastqTObam_R2_HiC: os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.pac"), os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.fai") ] output: - temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.bam")) + temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.R2.bam")) conda: os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: @@ -171,24 +228,44 @@ rule convertFastqTObam_R2_HiC: mem_mb=resource['convertFastqTObam_R2_HiC']['mem_mb'], time=resource['convertFastqTObam_R2_HiC']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/fastq2bam.HiC.R2.{asmID}.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/fastq2bam.asm1.HiC.R2.{asmID}.log") shell: """ (bwa-mem2 mem -t {threads} -B8 {input.assembly} {input.HiC_R2} | samtools view -Sb - > {output}) &> {log} """ -rule filter5PrimeEnd_R1_HiC: +rule convertFastqTObam_R2_HiC_asm2: input: - HiC_R1_bam=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.bam"), - assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta"), - indexes= [ os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.0123"), - os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.amb"), - os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.ann"), - os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.bwt.2bit.64"), - os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.pac"), - os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.fai") ] + HiC_R2=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.fastq"), + assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta"), + indexes= [ os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.0123"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.amb"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.ann"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.bwt.2bit.64"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.pac"), + os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta.fai") ] + output: + temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.R2.bam")) + conda: + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") + threads: + resource['convertFastqTObam_R2_HiC']['threads'] + resources: + mem_mb=resource['convertFastqTObam_R2_HiC']['mem_mb'], + time=resource['convertFastqTObam_R2_HiC']['time'] + log: + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/fastq2bam.asm2.HiC.R2.{asmID}.log") + shell: + """ + (bwa-mem2 mem -t {threads} -B8 {input.assembly} {input.HiC_R2} | samtools view -Sb - > {output}) &> {log} + """ + +rule filter5PrimeEnd_R1_HiC_asm1: + input: + HiC_R1_bam=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.R1.bam"), + assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta") output: - temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.FILTERED.bam")) + temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.R1.FILTERED.bam")) params: script=os.path.join(workflow.basedir, "scripts/process_HiC/filter_five_end.pl") conda: @@ -199,24 +276,41 @@ rule filter5PrimeEnd_R1_HiC: mem_mb=resource['filter5PrimeEnd_R1_HiC']['mem_mb'], time=resource['filter5PrimeEnd_R1_HiC']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/filtered.HiC.R1.{asmID}.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/filtered.asm1.HiC.R1.{asmID}.log") shell: """ (samtools view -h {input.HiC_R1_bam}| perl {params.script} | samtools view -@{threads} -Sb - > {output}) &> {log} """ -rule filter5PrimeEnd_R2_HiC: + +rule filter5PrimeEnd_R1_HiC_asm2: input: - HiC_R2_bam=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.bam"), - assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta"), - indexes= [ os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.0123"), - os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.amb"), - os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.ann"), - os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.bwt.2bit.64"), - os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.pac"), - os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta.fai") ] + HiC_R1_bam=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.R1.bam"), + assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta") output: - temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.FILTERED.bam")) + temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.R1.FILTERED.bam")) + params: + script=os.path.join(workflow.basedir, "scripts/process_HiC/filter_five_end.pl") + conda: + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") + threads: + resource['filter5PrimeEnd_R1_HiC']['threads'] + resources: + mem_mb=resource['filter5PrimeEnd_R1_HiC']['mem_mb'], + time=resource['filter5PrimeEnd_R1_HiC']['time'] + log: + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/filtered.asm2.HiC.R1.{asmID}.log") + shell: + """ + (samtools view -h {input.HiC_R1_bam}| perl {params.script} | samtools view -@{threads} -Sb - > {output}) &> {log} + """ + +rule filter5PrimeEnd_R2_HiC_asm1: + input: + HiC_R2_bam=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.R2.bam"), + assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm1.fasta") + output: + temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.R2.FILTERED.bam")) params: script=os.path.join(workflow.basedir, "scripts/process_HiC/filter_five_end.pl") conda: @@ -227,19 +321,64 @@ rule filter5PrimeEnd_R2_HiC: mem_mb=resource['filter5PrimeEnd_R2_HiC']['mem_mb'], time=resource['filter5PrimeEnd_R2_HiC']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/filtered.HiC.R1.{asmID}.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/filtered.asm1.HiC.R1.{asmID}.log") shell: """ (samtools view -h {input.HiC_R2_bam}| perl {params.script} | samtools view -@{threads} -Sb - > {output}) &> {log} """ -rule pairAndCombineFiltered_HiC: +rule filter5PrimeEnd_R2_HiC_asm2: + input: + HiC_R2_bam=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.R2.bam"), + assembly=os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta") + output: + temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.R2.FILTERED.bam")) + params: + script=os.path.join(workflow.basedir, "scripts/process_HiC/filter_five_end.pl") + conda: + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") + threads: + resource['filter5PrimeEnd_R2_HiC']['threads'] + resources: + mem_mb=resource['filter5PrimeEnd_R2_HiC']['mem_mb'], + time=resource['filter5PrimeEnd_R2_HiC']['time'] + log: + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/filtered.asm2.HiC.R1.{asmID}.log") + shell: + """ + (samtools view -h {input.HiC_R2_bam}| perl {params.script} | samtools view -@{threads} -Sb - > {output}) &> {log} + """ + + +rule pairAndCombineFiltered_HiC_asm1: + input: + R1=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.R1.FILTERED.bam"), + R2=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.R2.FILTERED.bam") + output: + temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.COMBINED.FILTERED.bam")) + params: + script=os.path.join(workflow.basedir, "scripts/process_HiC/two_read_bam_combiner.pl") + conda: + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") + threads: + resource['pairAndCombineFiltered_HiC']['threads'] + resources: + mem_mb=resource['pairAndCombineFiltered_HiC']['mem_mb'], + time=resource['pairAndCombineFiltered_HiC']['time'] + log: + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/combine.asm1.filtered.HiC.{asmID}.log") + shell: + """ + (perl {params.script} {input.R1} {input.R2} | samtools view -@{threads} -Sb > {output}) &>{log} + """ + +rule pairAndCombineFiltered_HiC_asm2: input: - R1=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R1.FILTERED.bam"), - R2=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.R2.FILTERED.bam") + R1=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.R1.FILTERED.bam"), + R2=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.R2.FILTERED.bam") output: - temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.COMBINED.FILTERED.bam")) + temp(os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.COMBINED.FILTERED.bam")) params: script=os.path.join(workflow.basedir, "scripts/process_HiC/two_read_bam_combiner.pl") conda: @@ -250,17 +389,36 @@ rule pairAndCombineFiltered_HiC: mem_mb=resource['pairAndCombineFiltered_HiC']['mem_mb'], time=resource['pairAndCombineFiltered_HiC']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/combine.filtered.HiC.{asmID}.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/combine.asm2.filtered.HiC.{asmID}.log") shell: """ (perl {params.script} {input.R1} {input.R2} | samtools view -@{threads} -Sb > {output}) &>{log} """ -rule pretextMap: +rule pretextMap_asm1: + input: + HiC_alignment=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.COMBINED.FILTERED.bam") + output: + pretextFile=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.COMBINED.FILTERED.pretext") + conda: + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") + threads: + resource['pretextMap']['threads'] + resources: + mem_mb=resource['pretextMap']['mem_mb'], + time=resource['pretextMap']['time'] + log: + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/PretextMap.{asmID}.asm1.log") + shell: + """ + (samtools view -h {input.HiC_alignment} | PretextMap -o {output.pretextFile} --sortby length --mapq 10) &> {log} + """ + +rule pretextMap_asm2: input: - HiC_alignment=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.COMBINED.FILTERED.bam") + HiC_alignment=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.COMBINED.FILTERED.bam") output: - pretextFile=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.COMBINED.FILTERED.pretext") + pretextFile=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.COMBINED.FILTERED.pretext") conda: os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") threads: @@ -269,17 +427,18 @@ rule pretextMap: mem_mb=resource['pretextMap']['mem_mb'], time=resource['pretextMap']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/PretextMap.{asmID}.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/PretextMap.{asmID}.asm2.log") shell: """ (samtools view -h {input.HiC_alignment} | PretextMap -o {output.pretextFile} --sortby length --mapq 10) &> {log} """ -rule pretextSnapshot: + +rule pretextSnapshot_asm1: input: - pretextFile=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.COMBINED.FILTERED.pretext") + pretextFile=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.COMBINED.FILTERED.pretext") output: - pretextSnapshotFULL=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.COMBINED.FILTERED_FullMap.png"), + pretextSnapshotFULL=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.COMBINED.FILTERED_FullMap.png"), params: outDirectory=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/") conda: @@ -290,13 +449,34 @@ rule pretextSnapshot: mem_mb=resource['pretextSnapshot']['mem_mb'], time=resource['pretextSnapshot']['time'] log: - os.path.join(config['Results'], "1_evaluation/{asmID}/logs/PretextSnapshot.{asmID}.log") + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/PretextSnapshot.{asmID}.asm1.log") shell: """ (PretextSnapshot -m {input.pretextFile} -o {params.outDirectory}) &> {log} """ +rule pretextSnapshot_asm2: + input: + pretextFile=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.COMBINED.FILTERED.pretext") + output: + pretextSnapshotFULL=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.COMBINED.FILTERED_FullMap.png"), + params: + outDirectory=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/") + conda: + os.path.join(workflow.basedir, "envs/HiC_CONTACT_MAPS.yaml") + threads: + resource['pretextSnapshot']['threads'] + resources: + mem_mb=resource['pretextSnapshot']['mem_mb'], + time=resource['pretextSnapshot']['time'] + log: + os.path.join(config['Results'], "1_evaluation/{asmID}/logs/PretextSnapshot.{asmID}.asm2.log") + shell: + """ + (PretextSnapshot -m {input.pretextFile} -o {params.outDirectory}) &> {log} + """ + ####################################################################################################################################### def asm1_gzipped(wildcards): @@ -381,11 +561,6 @@ rule symlink_UnzippedFasta_asm2: #################### -def asm2File(wildcards): - if samples.loc[(wildcards.asmID), "ALT_present"] == True: - return os.path.join(config['Results'], "1_evaluation/{asmID}/ASSEMBLY_FASTAS/{asmID}.asm2.fasta") - else: - return os.path.join(workflow.basedir, "scripts/standIN_files/ALT_missing.fasta") def merylDB(wildcards): return samples.loc[(wildcards.asmID), "merylDB"] @@ -903,12 +1078,15 @@ rule IndividualResults_md: rule PretextMaps_md: input: - PretextMap=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.HiC.COMBINED.FILTERED_FullMap.png") + PretextMap_asm1=os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm1.HiC.COMBINED.FILTERED_FullMap.png"), + PretextMap_asm2=lambda wildcards: os.path.join(config['Results'], "1_evaluation/{asmID}/HiC_MAPS/{asmID}.asm2.HiC.COMBINED.FILTERED_FullMap.png") if samples.loc[wildcards.asmID, "ALT_present"] else "scripts/standIN_files/ALT_missing.fasta" output: - pretextCP2keyResults=os.path.join(config['Results'], "1_evaluation/{asmID}/KEY_RESULTS/{asmID}.pretext_hiC_FullMap.png"), + pretextCP2keyResults_asm1=os.path.join(config['Results'], "1_evaluation/{asmID}/KEY_RESULTS/{asmID}.asm1.pretext_hiC_FullMap.png"), IndividualPretextMD=os.path.join(config['Results'],"1_evaluation/{asmID}/KEY_RESULTS/{asmID}_PRETEXT.md") params: - assemblyName='{asmID}' + assemblyName='{asmID}', + pretextCP2keyResults_asm2=os.path.join(config['Results'], "1_evaluation/{asmID}/KEY_RESULTS/{asmID}.asm2.pretext_hiC_FullMap.png"), + asm2provided=lambda wildcards: "true" if samples.loc[(wildcards.asmID), "ALT_present"] else False conda: os.path.join(workflow.basedir, "envs/AUXILIARY_PYTHON_SCRIPTS.yaml") script: diff --git a/scripts/assembly_stats/stats.py b/scripts/assembly_stats/stats.py deleted file mode 100644 index 497b6e80794c2edebac968292b563397e779de37..0000000000000000000000000000000000000000 --- a/scripts/assembly_stats/stats.py +++ /dev/null @@ -1,368 +0,0 @@ -import numpy as np -from itertools import groupby -import json -import sys -import csv -import os -import pandas as pd -from tabulate import tabulate - -#pd.set_option('display.float_format', lambda x: '%,g' % x) -with open(sys.argv[2]) as f: - ff=f.readlines() - -# estSizeFile = open() -# -# size_bp = estSizeFile.readlines() - -estSize=int(float(sys.argv[4])) - -if estSize == 0: - size_bp = ff[0][:-1] - print("No estimated Genome Size provided, using estimation from Genomescope2: ", size_bp) -else: - print("Estimated Genome Size provided as:", estSize, " using this size for NG and LG stats") - size_bp = estSize - -def fasta_iter(fasta_file): - """ - Takes a FASTA file, and produces a generator of Header and Sequences. - This is a memory-efficient way of analyzing a FASTA files -- without - reading the entire file into memory. - - Parameters - ---------- - fasta_file : str - The file location of the FASTA file - - Returns - ------- - header: str - The string contained in the header portion of the sequence record - (everything after the '>') - seq: str - The sequence portion of the sequence record - """ - - fh = open(fasta_file) - fa_iter = (x[1] for x in groupby(fh, lambda line: line[0] == ">")) - for header in fa_iter: - # drop the ">" - header = next(header)[1:].strip() - # join all sequence lines to one. - seq = "".join(s.upper().strip() for s in next(fa_iter)) - yield header, seq - - -def read_genome(fasta_file): - """ - Takes a FASTA file, and produces 2 lists of sequence lengths. It also - calculates the GC Content, since this is the only statistic that is not - calculated based on sequence lengths. - - Parameters - ---------- - fasta_file : str - The file location of the FASTA file - - Returns - ------ - contig_lens: list - A list of lengths of all contigs in the genome. - scaffold_lens: list - A list of lengths of all scaffolds in the genome. - gc_cont: float - The percentage of total basepairs in the genome that are either G or C. - """ - - gc = 0 - total_len = 0 - contig_lens = [] - scaffold_lens = [] - for _, seq in fasta_iter(fasta_file): - scaffold_lens.append(len(seq)) - if "NN" in seq: - contig_list = seq.split("NN") - else: - contig_list = [seq] - for contig in contig_list: - if len(contig): - gc += contig.count('G') + contig.count('C') - total_len += len(contig) - contig_lens.append(len(contig)) - gc_cont = (gc / total_len) * 100 -# print("this is gc_content", gc_cont) - return contig_lens, scaffold_lens, gc_cont - - -def calculate_stats(seq_lens, gc_cont): - naming = sys.argv[3] - stats = {} - seq_array = np.array(seq_lens) -# stats['Assembly:']=naming - stats['sequence_count'] = seq_array.size - testsize=stats['sequence_count'] - stats['number_of_gaps'] = 0 -# print("this is the count",naming," ", testsize) - stats['gc_content (%)'] = gc_cont - sorted_lens = seq_array[np.argsort(-seq_array)] - stats['longest (bp)'] = int(sorted_lens[0]) - testlongest= stats['longest (bp)'] -# print("this is the longest", naming," ",testlongest) - stats['shortest (bp)'] = int(sorted_lens[-1]) -# stats['median'] = np.median(sorted_lens) -# stats['mean'] = np.mean(sorted_lens) - stats['total_bps (bp)'] = int(np.sum(sorted_lens)) - testprint=stats['total_bps (bp)'] -# print("total_bp is", naming," ",testprint) - stats['estimated_size (bp)'] = int(size_bp) - csum = np.cumsum(sorted_lens) - # if stats['total_bps (bp)'] < stats['estimated_size (bp)']: - # csum_ng = np.append(csum, stats['estimated_size (bp)']) - # else: - # csum_ng=csum - for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]: - nx = int(stats['total_bps (bp)'] * (level / 100)) - csumn = min(csum[csum >= nx]) - l_level = int(np.where(csum == csumn)[0]) + 1 - stats['L' + str(level)] = l_level - for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]: -# print("the totalbps are:", stats['total_bps (bp)']) - nx = int(stats['total_bps (bp)'] * (level / 100)) -# print("this is the nx", nx) -# print("this is the csum", csum) - csumn = min(csum[csum >= nx]) -# print("this is the csumn", csumn) - l_level = int(np.where(csum == csumn)[0]) - n_level = int(sorted_lens[l_level]) - stats['N' + str(level)] = n_level -# print(level, " ", n_level) - for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]: -# print("the estbps are:", stats['estimated_size (bp)']) - ngx = int(stats['estimated_size (bp)'] * (level / 100)) -# print("this is the ngx", ngx) -# print("this is the csum", csum_ng) -# print("this is the csum", csum) -# print("this is the [csum >= ngx]", np.array(csum >= ngx)) - new_array=np.array(csum >= ngx) - # print(np.any(new_array)) - if np.any(new_array) == False: - csumng = csum[seq_array.size-1] - # print("this is the csumng", csumng) - lg_level = int(np.where(csum == csumng)[0]) + 1 - stats['LG' + str(level)] = lg_level - elif np.any(new_array) == True: - csumng = min(csum[csum >= ngx]) - # print("this is the csumng", csumng) - lg_level = int(np.where(csum == csumng)[0]) + 1 - stats['LG' + str(level)] = lg_level - for level in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]: - ngx = int(stats['estimated_size (bp)'] * (level / 100)) - new_array=np.array(csum >= ngx) - # print(np.any(new_array)) - if np.any(new_array) == False: - csumng = csum[seq_array.size-1] - # print("this is the csumng", csumng) - lg_level = int(np.where(csum == csumng)[0]) - ng_level = int(sorted_lens[lg_level]) - stats['NG' + str(level)] = ng_level - elif np.any(new_array) == True: - csumng = min(csum[csum >= ngx]) - # print("this is the csumng", csumng) - lg_level = int(np.where(csum == csumng)[0]) - ng_level = int(sorted_lens[lg_level]) - stats['NG' + str(level)] = ng_level - return stats - - -if __name__ == "__main__": -# print(size_bp) -# print(type(size_bp)) - naming = sys.argv[3] - infilename = sys.argv[1] - contig_lens, scaffold_lens, gc_cont = read_genome(infilename) -# contig_stats = calculate_stats(contig_lens, gc_cont) - scaffold_stats = calculate_stats(scaffold_lens, gc_cont) - rounded_gc = round(gc_cont, 2) -# print("this is the rounded_gc: ", rounded_gc) - contig_stats = calculate_stats(contig_lens, gc_cont) - gaps=contig_stats.get('sequence_count') - scaffold_stats.get('sequence_count') - scaffold_stats['number_of_gaps'] = gaps - contig_stats['number_of_gaps'] = gaps -# print("this is the scaffold_stats: ", scaffold_stats) -# print(scaffold_stats) -# df_scaffold_all= pd.DataFrame.from_dict(scaffold_stats, orient= 'index') -# print(df_scaffold_all) -# df2 = pd.DataFrame([scaffold_stats]).T -# print(df2) -# s = pd.Series(scaffold_stats, name=naming) -# s.index.name = 'Assembly:' -# s.reset_index() -# print(s) - scaff_seq=pd.DataFrame(scaffold_stats.items(), columns=['Assembly:', naming]) -# print("this is the scaff_seq: ", scaff_seq) - df_scaffold_top=scaff_seq.iloc[0:7,0:2] -# print("this is GC CONTENT", gc_cont) - df_scaffold_top[naming]=df_scaffold_top[naming].astype('int64').apply('{:,}'.format) -# print("this is the top: ", df_scaffold_top) - df_scaffold_top[naming][2] = rounded_gc -# print("this is df_scaffold_top[naming][2]", df_scaffold_top[naming][2]) -# print("this is the top: ", df_scaffold_top) - # df_scaffold_top=df_scaffold_top.style.hide_index() -# df_scaffold_top[naming].round(decimals=0) - # df_contig_all=pd.DataFrame(data=contig_stats) - # df_contig_top=df_contig_all.iloc[0:6,0:2] - df_scaffold_Nxx=pd.DataFrame(scaffold_stats.items(), columns=['Nxx Level (%)', 'Length of Nxx Scaffold (bp)']) - df_scaffold_Nxx=df_scaffold_Nxx.iloc[31:55,0:2] - df_scaffold_Nxx=df_scaffold_Nxx.reset_index() -# print(df_scaffold_Nxx) - df_scaffold_NGxx=pd.DataFrame(scaffold_stats.items(), columns=['NGxx Level (%)', 'Length of NGxx Scaffold (bp)']) - df_scaffold_NGxx=df_scaffold_NGxx.iloc[79:104,0:2] - df_scaffold_NGxx=df_scaffold_NGxx.reset_index() -# print(df_scaffold_NGxx) - df_scaffold_N_NG=pd.concat([df_scaffold_Nxx,df_scaffold_NGxx], axis=1) - df_scaffold_N_NG=df_scaffold_N_NG.drop(df_scaffold_N_NG.columns[[0,3]], axis = 1) - df_scaffold_N_NG['Length of Nxx Scaffold (bp)']=df_scaffold_N_NG['Length of Nxx Scaffold (bp)'].astype('int64').apply('{:,}'.format) - df_scaffold_N_NG['Length of NGxx Scaffold (bp)']=df_scaffold_N_NG['Length of NGxx Scaffold (bp)'].astype('int64').apply('{:,}'.format) - # df_scaffold_N_NG=df_scaffold_N_NG.style.hide_index() - df_scaffold_Lxx=pd.DataFrame(scaffold_stats.items(), columns=['Lxx Level (%)', 'Count of scaffolds (for Lxx Level)']) - df_scaffold_Lxx=df_scaffold_Lxx.iloc[7:31,0:2] - df_scaffold_Lxx=df_scaffold_Lxx.reset_index() -# print(df_scaffold_Nxx) - df_scaffold_LGxx=pd.DataFrame(scaffold_stats.items(), columns=['LGxx Level (%)', 'Count of scaffolds (for LGxx Level)']) - df_scaffold_LGxx=df_scaffold_LGxx.iloc[55:79,0:2] - df_scaffold_LGxx=df_scaffold_LGxx.reset_index() -# print(df_scaffold_NGxx) - df_scaffold_L_LG=pd.concat([df_scaffold_Lxx,df_scaffold_LGxx], axis=1) - df_scaffold_L_LG=df_scaffold_L_LG.drop(df_scaffold_L_LG.columns[[0,3]], axis = 1) - df_scaffold_L_LG['Count of scaffolds (for Lxx Level)']=df_scaffold_L_LG['Count of scaffolds (for Lxx Level)'].astype('int64').apply('{:,}'.format) - df_scaffold_L_LG['Count of scaffolds (for LGxx Level)']=df_scaffold_L_LG['Count of scaffolds (for LGxx Level)'].astype('int64').apply('{:,}'.format) -################################################################################################################ - - contig_seq=pd.DataFrame(contig_stats.items(), columns=['Assembly:', naming]) - df_contig_top=contig_seq.iloc[0:7,0:2] - df_contig_top[naming]=df_contig_top[naming].astype('int64').apply('{:,}'.format) - df_contig_top[naming][2] = rounded_gc - # df_scaffold_top=df_scaffold_top.style.hide_index() -# df_scaffold_top[naming].round(decimals=0) - # df_contig_all=pd.DataFrame(data=contig_stats) - # df_contig_top=df_contig_all.iloc[0:6,0:2] - df_contig_Nxx=pd.DataFrame(contig_stats.items(), columns=['Nxx Level (%)', 'Length of Nxx contig (bp)']) - df_contig_Nxx=df_contig_Nxx.iloc[31:55,0:2] - df_contig_Nxx=df_contig_Nxx.reset_index() -# print(df_scaffold_Nxx) - df_contig_NGxx=pd.DataFrame(contig_stats.items(), columns=['NGxx Level (%)', 'Length of NGxx contig (bp)']) - df_contig_NGxx=df_contig_NGxx.iloc[79:104,0:2] - df_contig_NGxx=df_contig_NGxx.reset_index() -# print(df_scaffold_NGxx) - df_contig_N_NG=pd.concat([df_contig_Nxx,df_contig_NGxx], axis=1) - df_contig_N_NG=df_contig_N_NG.drop(df_contig_N_NG.columns[[0,3]], axis = 1) - df_contig_N_NG['Length of Nxx contig (bp)']=df_contig_N_NG['Length of Nxx contig (bp)'].astype('int64').apply('{:,}'.format) - df_contig_N_NG['Length of NGxx contig (bp)']=df_contig_N_NG['Length of NGxx contig (bp)'].astype('int64').apply('{:,}'.format) - # df_scaffold_N_NG=df_scaffold_N_NG.style.hide_index() - df_contig_Lxx=pd.DataFrame(contig_stats.items(), columns=['Lxx Level (%)', 'Count of contig (for Lxx Level)']) - df_contig_Lxx=df_contig_Lxx.iloc[7:31,0:2] - df_contig_Lxx=df_contig_Lxx.reset_index() -# print(df_scaffold_Nxx) - df_contig_LGxx=pd.DataFrame(contig_stats.items(), columns=['LGxx Level (%)', 'Count of contig (for LGxx Level)']) - df_contig_LGxx=df_contig_LGxx.iloc[55:79,0:2] - df_contig_LGxx=df_contig_LGxx.reset_index() -# print(df_scaffold_NGxx) - df_contig_L_LG=pd.concat([df_contig_Lxx,df_contig_LGxx], axis=1) - df_contig_L_LG=df_contig_L_LG.drop(df_contig_L_LG.columns[[0,3]], axis = 1) - df_contig_L_LG['Count of contig (for Lxx Level)']=df_contig_L_LG['Count of contig (for Lxx Level)'].astype('int64').apply('{:,}'.format) - df_contig_L_LG['Count of contig (for LGxx Level)']=df_contig_L_LG['Count of contig (for LGxx Level)'].astype('int64').apply('{:,}'.format) - # df_scaffold_L_LG=df_scaffold_L_LG.style.hide_index() - # print(df_contig_top) -# print(scaffold_stats) -# stat_output = {'Contig Stats': contig_stats, -# 'Scaffold Stats': scaffold_stats} -# print(json.dumps(stat_output, indent=2, sort_keys=False)) -# scaffold_out = {scaffold_stats} -# contig_out = {contig_stats} - - -# print(json.dumps(scaffold_stats, indent=2, sort_keys=False)) -# print(json.dumps(contig_stats, indent=2, sort_keys=False)) - # dict_writer = csv.DictWriter(stat_output, fieldnames=None, delimiter='\t') - # print(dict_writer) -# df_raw = pd.DataFrame.from_dict(data, orient='index') - # print(df_raw) -# with open('statdata.txt', 'w') as outfile: -# json.dump(scaffold_stats, outfile) -# with open('contigdata.txt', 'w') as out2file: -# json.dump(contig_stats, out2file) - # scaff = csv.writer(open(naming + "_scaffold_stats.tsv", "w"), delimiter='\t') - # for key, val in scaffold_stats.items(): - # scaff.writerow([key, val]) - # - # contig = csv.writer(open(naming + "_contig_stats.tsv", "w"), delimiter='\t') - # for key, val in contig_stats.items(): - # - with open(sys.argv[5], 'w') as outputfile: -# # print('#' + libraryName, file=outputfile) -# # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile) -# # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile) -# # print("Successfully Processed reads (Paired): " + successfull + " ( "+str(successfull_perc)+"%)", file=outputfile) - print(df_scaffold_top.to_string(index=False), file=outputfile) - print("", file=outputfile) - print(df_scaffold_N_NG.to_string(index=False), file=outputfile) - print("", file=outputfile) - print(df_scaffold_L_LG.to_string(index=False), file=outputfile) - - with open(sys.argv[6], 'w') as outputfile2: -# # print('#' + libraryName, file=outputfile) -# # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile) -# # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile) -# # print("Successfully Processed reads (Paired): " + successfull + " ( "+str(successfull_perc)+"%)", file=outputfile) - print(df_contig_top.to_string(index=False), file=outputfile2) - print("", file=outputfile2) - print(df_contig_N_NG.to_string(index=False), file=outputfile2) - print("", file=outputfile2) - print(df_contig_L_LG.to_string(index=False), file=outputfile2) - - # with open(sys.argv[4], 'w') as outputRst: - # print(tabulate(df_scaffold_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst) - # print("", file=outputRst) - # print(tabulate(df_scaffold_N_NG, headers='keys',tablefmt="rst", showindex=False), file=outputRst) - # print("", file=outputRst) - # print(tabulate(df_scaffold_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst) - # print("", file=outputRst) - # # - # with open(sys.argv[5], 'w') as outputRst2: - # print(tabulate(df_contig_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst2) - # print("", file=outputRst2) - # print(tabulate(df_contig_N_NG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2) - # print("", file=outputRst2) - # print(tabulate(df_contig_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2) - # print("", file=outputRst2) - - # with open(sys.argv[4], 'w') as outputRst: - # print(tabulate(df_scaffold_top, headers='keys',tablefmt="pipe", showindex=False), file=outputRst) - # print("", file=outputRst) - # print(tabulate(df_scaffold_N_NG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst) - # print("", file=outputRst) - # print(tabulate(df_scaffold_L_LG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst) - # print("", file=outputRst) - # # - # with open(sys.argv[5], 'w') as outputRst2: - # print(tabulate(df_contig_top, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2) - # print("", file=outputRst2) - # print(tabulate(df_contig_N_NG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2) - # print("", file=outputRst2) - # print(tabulate(df_contig_L_LG, headers='keys',tablefmt="pipe", showindex=False), file=outputRst2) - # print("", file=outputRst2) - # list_of_dfs=[df_scaffold_top,df_scaffold_N_NG,df_scaffold_L_LG] - # for df in list_of_dfs: - # with open('all_dfs.tsv','a') as f: - # df.to_csv(f) - # f.write("\n") -# with open("testok.tsv", 'w') as outputfile: -# # print('#' + libraryName, file=outputfile) -# # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile) -# # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile) -# # print("Successfully Processed reads (Paired): " + successfull + " ( "+str(successfull_perc)+"%)", file=outputfile) -# print(tabulate(loadinTop, headers='keys',tablefmt="rst", showindex=False), file=outputfile) -# # print('', file=outputfile) -# print(tabulate(result, headers='keys', tablefmt="psql", showindex=False), file=outputfile) -# print('', file=outputfile) diff --git a/scripts/compare_results/fullTable_heatmap_external_metrics.R b/scripts/compare_results/fullTable_heatmap_external_metrics.R index b57d3cd11e72a1e8b8f3e9df07fc35b30f291c30..ffde73fd555219127bcb9e44147949229f5320e0 100644 --- a/scripts/compare_results/fullTable_heatmap_external_metrics.R +++ b/scripts/compare_results/fullTable_heatmap_external_metrics.R @@ -1,3 +1,5 @@ +#!/usr/bin/env Rscript + suppressMessages(library(dplyr)) suppressMessages(library(formattable)) diff --git a/scripts/compare_results/fullTable_heatmap_internalComparison_metrics.R b/scripts/compare_results/fullTable_heatmap_internalComparison_metrics.R index edef08fd22f37f0b7ad16aa3464931703d9bf229..03e76e23d0d70d0da53deda4cb9f906d31cde345 100644 --- a/scripts/compare_results/fullTable_heatmap_internalComparison_metrics.R +++ b/scripts/compare_results/fullTable_heatmap_internalComparison_metrics.R @@ -1,3 +1,5 @@ +#!/usr/bin/env Rscript + suppressMessages(library(dplyr)) suppressMessages(library(formattable)) diff --git a/scripts/report/addFullTableForReport_all_metrics.py b/scripts/report/addFullTableForReport_all_metrics.py index 93a2f0516fc80f7af8d95f3d76688b0650658021..13cfe4dcd2f3c00e88b89eef0bfdefd77c05c9f3 100644 --- a/scripts/report/addFullTableForReport_all_metrics.py +++ b/scripts/report/addFullTableForReport_all_metrics.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import pandas as pd from tabulate import tabulate samples=pd.read_csv(snakemake.input.results, dtype=str, index_col=0, delim_whitespace=True, skip_blank_lines=True) diff --git a/scripts/report/makeAllMetricsTable.sh b/scripts/report/makeAllMetricsTable.sh index 2ade08bf19176aa747265ff2a6051ad2f9017228..f20f3dfdfa25fea81c53a4a28d408793cb07fabe 100755 --- a/scripts/report/makeAllMetricsTable.sh +++ b/scripts/report/makeAllMetricsTable.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + #usage: ASM ID, gfastats, qv, busco, completeness, output #asm_id="AssemblyX" diff --git a/scripts/report/makePDF_indivMD.py b/scripts/report/makePDF_indivMD.py index 8d5ffc98701ab8a9af4e7ef28f75fa8460fb4a43..31ecfc0760cb781382fba5d3d02613d937cace76 100644 --- a/scripts/report/makePDF_indivMD.py +++ b/scripts/report/makePDF_indivMD.py @@ -1,5 +1,7 @@ -import os -import numpy as np +#!/usr/bin/env python3 + +#import os +#import numpy as np import pandas as pd from tabulate import tabulate import re diff --git a/scripts/report/make_erga_assembly_report.py b/scripts/report/make_erga_assembly_report.py index 96c6ba13f7f788ed00f40c107aaf027beca2cad3..0745fc29b4c08db478586eae545904344a25b289 100644 --- a/scripts/report/make_erga_assembly_report.py +++ b/scripts/report/make_erga_assembly_report.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import os import sys import pandas as pd diff --git a/scripts/report/pretextMapsToMarkdown.py b/scripts/report/pretextMapsToMarkdown.py index bd87669d1752a36ed51cb5916bc72fa62069366f..c7929cdd19a9a3aa2e17f966123924633698ec35 100644 --- a/scripts/report/pretextMapsToMarkdown.py +++ b/scripts/report/pretextMapsToMarkdown.py @@ -1,6 +1,15 @@ +#!/usr/bin/env python3 + import shutil -shutil.copyfile(snakemake.input.PretextMap, snakemake.output.pretextCP2keyResults) + with open(snakemake.output.IndividualPretextMD, "w") as out: + shutil.copyfile(snakemake.input.PretextMap_asm1, snakemake.output.pretextCP2keyResults_asm1) print("", file=out) - print("###",snakemake.params.assemblyName," HiC Heatmap", file=out) - print("{ height=30% }", file=out) + print("###",snakemake.params.assemblyName," HiC Heatmap (primary assembly)", file=out) + print("{ height=30% }", file=out) + if snakemake.params.asm2provided: + shutil.copyfile(snakemake.input.PretextMap_asm2, snakemake.params.pretextCP2keyResults_asm2) + print("", file=out) + print("###",snakemake.params.assemblyName," HiC Heatmap (alternate assembly)", file=out) + print("{ height=30% }", file=out) +