Skip to content
Snippets Groups Projects
Commit 4a7ece1b authored by james94's avatar james94
Browse files

quick fix

parent 1fb5c77a
Branches
No related tags found
No related merge requests found
...@@ -32,28 +32,15 @@ if 'Library_R1' in samples.columns: ...@@ -32,28 +32,15 @@ if 'Library_R1' in samples.columns:
samples = samples1.set_index('assemblyName') samples = samples1.set_index('assemblyName')
# print(samples) # print(samples)
samples=samples.join(samples6['est_bp']) samples=samples.join(samples6['est_bp'])
print("The following assemblies will be evaluated: ", list(samples.index)) # print("The following assemblies will be evaluated: ", list(samples.index))
elif 'preBuiltMerylDatabase' in samples.columns: elif 'preBuiltMerylDatabase' in samples.columns:
samples['est_bp'] = samples['est_bp'].fillna(0) samples['est_bp'] = samples['est_bp'].fillna(0)
samples["est_bp"] = pd.to_numeric(samples["est_bp"]) samples["est_bp"] = pd.to_numeric(samples["est_bp"])
print("The following assemblies will be evaluated: ", list(samples.index)) # print("The following assemblies will be evaluated: ", list(samples.index))
else: else:
raise ValueError('Sample Sheet structure not recognised. Please make sure you are using the correct sample sheet (prebuilt or non-prebuilt meryl db, etc)') raise ValueError('Sample Sheet structure not recognised. Please make sure you are using the correct sample sheet (prebuilt or non-prebuilt meryl db, etc)')
# print(samples)
# parse arguments
# parser = argparse.ArgumentParser(description="Download busco lineage determined by prefix", formatter_class=argparse.RawTextHelpFormatter)
# parser.add_argument("--l", "-lineage", required=True, type=str, help="Lineage prefix to download --> first match will be downloaded")
# parser.add_argument("--o", "-outputPath", required=True, type=str, help="Location To Download busco databe into")
# #parser.add_argument("--wkd", "-workingDir", required=True, type=str, help="Location To lineage Dir")
# args = parser.parse_args()
#os.path.join(workflow.basedir, "buscoLineage/" + config['busco5Lineage'] + "_odb10")
args_o=os.path.join(workflow.basedir, "buscoLineage") args_o=os.path.join(workflow.basedir, "buscoLineage")
args_l=config['busco5Lineage'] args_l=config['busco5Lineage']
checkLineagePath=args_o + "/" + args_l + "_odb10" checkLineagePath=args_o + "/" + args_l + "_odb10"
...@@ -65,14 +52,14 @@ if os.path.isdir(args_l) is True: ...@@ -65,14 +52,14 @@ if os.path.isdir(args_l) is True:
# subprocess.call("ln -sf %s %s"%(args.l, args.l), shell=True) # subprocess.call("ln -sf %s %s"%(args.l, args.l), shell=True)
buscoDataBaseName=os.path.basename(args_l) buscoDataBaseName=os.path.basename(args_l)
buscoDataBaseName=buscoDataBaseName[:-6] buscoDataBaseName=buscoDataBaseName[:-6]
print("Lineage path given, basename is:", buscoDataBaseName) # print("Lineage path given, basename is:", buscoDataBaseName)
elif os.path.isdir(args_l) is False and os.path.isdir(checkLineagePath) is True: elif os.path.isdir(args_l) is False and os.path.isdir(checkLineagePath) is True:
# subprocess.call("ln -sf %s %s"%(checkLineagePath, checkLineageDled), shell=True) # subprocess.call("ln -sf %s %s"%(checkLineagePath, checkLineageDled), shell=True)
buscoDataBaseName=os.path.basename(checkLineagePath) buscoDataBaseName=os.path.basename(checkLineagePath)
buscoDataBaseName=buscoDataBaseName[:-6] buscoDataBaseName=buscoDataBaseName[:-6]
print("Database already in buscoLineage directory, basename is:", buscoDataBaseName) # print("Database already in buscoLineage directory, basename is:", buscoDataBaseName)
else: else:
print("Database will be downloaded") # print("Database will be downloaded")
url = "https://busco-data.ezlab.org/v5/data/lineages/" url = "https://busco-data.ezlab.org/v5/data/lineages/"
html = urlopen(url).read() html = urlopen(url).read()
soup = BeautifulSoup(html, features="html.parser") soup = BeautifulSoup(html, features="html.parser")
...@@ -87,20 +74,19 @@ else: ...@@ -87,20 +74,19 @@ else:
#identify the lineage file #identify the lineage file
for line in text.splitlines(): for line in text.splitlines():
if line.startswith(args_l): if line.startswith(args_l):
print(line) # print(line)
linID=line.split(" ")[0] linID=line.split(" ")[0]
print(linID) # print(linID)
break break
if not linID==None: if not linID==None:
linLink=url+linID linLink=url+linID
print(linLink) # print(linLink)
subprocess.run("wget -q -P %s %s"%(outDir, linLink), shell=True, check=True) subprocess.run("wget -q -P %s %s"%(outDir, linLink), shell=True, check=True)
subprocess.run("tar -xvf %s*.tar.gz -C %s"%(args_o + "/" + args_l, outDir), shell=True, check=True) subprocess.run("tar -xvf %s*.tar.gz -C %s"%(args_o + "/" + args_l, outDir), shell=True, check=True)
subprocess.run("rm %s_*.tar.gz"%(checkLineageDled), shell=True, check=True) subprocess.run("rm %s_*.tar.gz"%(checkLineageDled), shell=True, check=True)
buscoDataBaseName=args_l buscoDataBaseName=args_l
print("Database success downloaded and placed in buscoLineage directory, basename is:", buscoDataBaseName)
else: else:
print("Error - could not identify lineage please check %s for a correct prefix"%(url)) raise ValueError("Error - could not identify lineage please check busco site for a correct prefix")
#Function to check if one of, or both the barcodeP5 and barcodeP7 fasta files are given by the user. #Function to check if one of, or both the barcodeP5 and barcodeP7 fasta files are given by the user.
......
#
#
# import csv # import csv
# aggregateResults = csv.writer(open(os.path.join(config['Results'],"{wildcards.sample}/keyResults/{wildcards.sample}_aggregate"), "w"), delimiter='\t') # aggregateResults = csv.writer(open(os.path.join(config['Results'],"{wildcards.sample}/keyResults/{wildcards.sample}_aggregate"), "w"), delimiter='\t')
#
#
# rule checkOrDownload_BuscoLineage: # rule checkOrDownload_BuscoLineage:
# input: # input:
# config['buscoDatabase'] # config['buscoDatabase']
# output: # output:
#
# rule trimAdapters: # rule trimAdapters:
# input: # input:
# config['samplesTSV'], # config['samplesTSV'],
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
# "../envs/pigz.yaml" # "../envs/pigz.yaml"
# shell: # shell:
# "(trim_galore -q 5 -j {threads} --dont_gzip --fastqc --length 65 -o {params.base} --paired {params.file}) &> {log}" # "(trim_galore -q 5 -j {threads} --dont_gzip --fastqc --length 65 -o {params.base} --paired {params.file}) &> {log}"
#
# rule multiqc: # rule multiqc:
# input: # input:
# os.path.join(config['Results'],"{sample}/0_qualityControl/01_fastqc") # os.path.join(config['Results'],"{sample}/0_qualityControl/01_fastqc")
...@@ -43,8 +43,8 @@ ...@@ -43,8 +43,8 @@
# "../envs/pigz.yaml" # "../envs/pigz.yaml"
# shell: # shell:
# "multiqc {input} -o {params.folder} -n {params.filename}" # "multiqc {input} -o {params.folder} -n {params.filename}"
#
#
# rule combine_illuminaReads_R1: # rule combine_illuminaReads_R1:
# input: # input:
# R1=os.path.join(config['Results'],"{sample}/0_qualityControl/01_fastqc") # R1=os.path.join(config['Results'],"{sample}/0_qualityControl/01_fastqc")
...@@ -87,7 +87,6 @@ ...@@ -87,7 +87,6 @@
# # mv {input.contig} {output.contig} # # mv {input.contig} {output.contig}
# # mv {input.scaffold} {output.scaffold} # # mv {input.scaffold} {output.scaffold}
# """ # """
rule busco5: rule busco5:
input: input:
assembly=lambda wildcards: samples.at[wildcards.sample, 'assembly_fasta'] \ assembly=lambda wildcards: samples.at[wildcards.sample, 'assembly_fasta'] \
...@@ -108,7 +107,7 @@ rule busco5: ...@@ -108,7 +107,7 @@ rule busco5:
conda: conda:
"../envs/busco_and_assembly.yaml" "../envs/busco_and_assembly.yaml"
log: log:
os.path.join(config['Results'], "logs/1_busco5/{sample}_busco5.log") os.path.join(config['Results'], "{sample}" + "/1_busco5/" + "{sample}" + "/logs/{sample}_busco5.log")
priority: priority:
20 20
shell: shell:
...@@ -230,6 +229,8 @@ rule merqury: ...@@ -230,6 +229,8 @@ rule merqury:
# merylPath = os.path.join(workflow.basedir, "programs/meryl/build/bin:$PATH"), # merylPath = os.path.join(workflow.basedir, "programs/meryl/build/bin:$PATH"),
# merylPath = os.path.join(workflow.basedir, "programs/meryl-1.0/Linux-amd64/bin:$PATH"), # merylPath = os.path.join(workflow.basedir, "programs/meryl-1.0/Linux-amd64/bin:$PATH"),
outPath= os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury"), outPath= os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury"),
symlink_fasta=os.path.join(config['Results'], "{sample}" + "/{sample}.fasta"),
symlink_merylDB=directory(os.path.join(config['Results'], "{sample}" + "/combinedRead1Read2.21.meryl"))
# export= os.path.join(workflow.basedir, "programs/merqury-1.1") # export= os.path.join(workflow.basedir, "programs/merqury-1.1")
threads: threads:
workflow.cores * 0.5 workflow.cores * 0.5
...@@ -237,9 +238,7 @@ rule merqury: ...@@ -237,9 +238,7 @@ rule merqury:
report(os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.qv"), caption="../report/merqury/qvScore.rst", category="QV Estimate", subcategory="{sample}"), report(os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.qv"), caption="../report/merqury/qvScore.rst", category="QV Estimate", subcategory="{sample}"),
report(os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.{sample}.spectra-cn.st.png"), caption="../report/merqury/spectra-cn_stacked.rst", category="Copy Number Spectra Plots", subcategory="{sample}"), report(os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.{sample}.spectra-cn.st.png"), caption="../report/merqury/spectra-cn_stacked.rst", category="Copy Number Spectra Plots", subcategory="{sample}"),
report(os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.{sample}.spectra-cn.fl.png"), caption="../report/merqury/spectra-cn_filled.rst", category="Copy Number Spectra Plots", subcategory="{sample}"), report(os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.{sample}.spectra-cn.fl.png"), caption="../report/merqury/spectra-cn_filled.rst", category="Copy Number Spectra Plots", subcategory="{sample}"),
os.path.join(config['Results'], "{sample}" +"/2_QVstats_merylAndMerqury/combinedRead1Read2.21.hist"), os.path.join(config['Results'], "{sample}" +"/2_QVstats_merylAndMerqury/combinedRead1Read2.21.hist")
symlink_fasta=os.path.join(config['Results'], "{sample}" + "/{sample}.fasta"),
symlink_merylDB=directory(os.path.join(config['Results'], "{sample}" + "/combinedRead1Read2.21.meryl"))
log: log:
os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/logs/{sample}_merqury.log") os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/logs/{sample}_merqury.log")
priority: priority:
...@@ -248,11 +247,11 @@ rule merqury: ...@@ -248,11 +247,11 @@ rule merqury:
"../envs/merylMerq_2.yaml" "../envs/merylMerq_2.yaml"
shell: shell:
""" """
ln -s {input.assembly} {output.symlink_fasta} ln -s {input.assembly} {params.symlink_fasta}
ln -s {input.merylDB} {output.symlink_merylDB} ln -s {input.merylDB} {params.symlink_merylDB}
cd {params.outPath} cd {params.outPath}
export OMP_NUM_THREADS={threads} export OMP_NUM_THREADS={threads}
(merqury.sh {output.symlink_merylDB} {output.symlink_fasta} {params.outFile}) &> {log} (merqury.sh {params.symlink_merylDB} {params.symlink_fasta} {params.outFile}) &> {log}
""" """
# bash {params.script} {input.merylDB} {params.symlink} {params.outFile} # bash {params.script} {input.merylDB} {params.symlink} {params.outFile}
...@@ -278,7 +277,7 @@ rule genomescope2: ...@@ -278,7 +277,7 @@ rule genomescope2:
summary=report(os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_summary.txt"), caption="../report/genomescope/summary.rst", category="Genomescope Profile", subcategory="{sample}"), summary=report(os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_summary.txt"), caption="../report/genomescope/summary.rst", category="Genomescope Profile", subcategory="{sample}"),
logPlot=report(os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_log_plot.png"), caption="../report/genomescope/logplot.rst", category="Genomescope Profile", subcategory="{sample}"), logPlot=report(os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_log_plot.png"), caption="../report/genomescope/logplot.rst", category="Genomescope Profile", subcategory="{sample}"),
linearPlot=report(os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_linear_plot.png"), caption="../report/genomescope/linearplot.rst", category="Genomescope Profile", subcategory="{sample}"), linearPlot=report(os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_linear_plot.png"), caption="../report/genomescope/linearplot.rst", category="Genomescope Profile", subcategory="{sample}"),
estimatedSize=temp(os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_sizeEst.txt")) estimatedSize=os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_sizeEst.txt")
conda: conda:
"../envs/genomescope.yaml" "../envs/genomescope.yaml"
shell: shell:
...@@ -300,8 +299,8 @@ rule assemblyStats: ...@@ -300,8 +299,8 @@ rule assemblyStats:
# report(os.path.join(config['Results'],"{sample}" + "/assemblyStats/{sample}_contig_stats.tsv"), caption="../report/assemblystats/{sample}_scaffold_rst.rst", category="Assembly Statistics", subcategory="{sample}"), # report(os.path.join(config['Results'],"{sample}" + "/assemblyStats/{sample}_contig_stats.tsv"), caption="../report/assemblystats/{sample}_scaffold_rst.rst", category="Assembly Statistics", subcategory="{sample}"),
scaffStats=report(os.path.join(config['Results'],"{sample}" + "/4_assemblyStats/{sample}_scaffold_stats.tsv"), caption="../report/assemblystats/stats.rst", category="Assembly Statistics", subcategory="{sample}"), scaffStats=report(os.path.join(config['Results'],"{sample}" + "/4_assemblyStats/{sample}_scaffold_stats.tsv"), caption="../report/assemblystats/stats.rst", category="Assembly Statistics", subcategory="{sample}"),
contStats=report(os.path.join(config['Results'],"{sample}" + "/4_assemblyStats/{sample}_contig_stats.tsv"), caption="../report/assemblystats/stats.rst", category="Assembly Statistics", subcategory="{sample}"), contStats=report(os.path.join(config['Results'],"{sample}" + "/4_assemblyStats/{sample}_contig_stats.tsv"), caption="../report/assemblystats/stats.rst", category="Assembly Statistics", subcategory="{sample}"),
scaff_rst = temp(os.path.join(workflow.basedir, "report/assemblystats/{sample}_scaffold_rst.rst")), #scaff_rst = temp(os.path.join(workflow.basedir, "report/assemblystats/{sample}_scaffold_rst.rst")),
contig_rst = temp(os.path.join(workflow.basedir, "report/assemblystats/{sample}_contig_rst.rst")) # contig_rst = temp(os.path.join(workflow.basedir, "report/assemblystats/{sample}_contig_rst.rst"))
params: params:
script = os.path.join(workflow.basedir, "scripts/stats.py"), script = os.path.join(workflow.basedir, "scripts/stats.py"),
path = os.path.join(config['Results'], "{sample}" + "/4_assemblyStats/"), path = os.path.join(config['Results'], "{sample}" + "/4_assemblyStats/"),
...@@ -310,8 +309,9 @@ rule assemblyStats: ...@@ -310,8 +309,9 @@ rule assemblyStats:
if wildcards.sample in samples.index else '' if wildcards.sample in samples.index else ''
shell: shell:
""" """
python {params.script} {input.assembly} {input.estGenome} {params.filename} {output.scaff_rst} {output.contig_rst} {params.given_size} {output.scaffStats} {output.contStats} python {params.script} {input.assembly} {input.estGenome} {params.filename} {params.given_size} {output.scaffStats} {output.contStats}
""" """
# python {params.script} {input.assembly} {input.estGenome} {params.filename} {output.scaff_rst} {output.contig_rst} {params.given_size} {output.scaffStats} {output.contStats}
# estGenomeSize=$( grep 'Genome Haploid Length' {input.estGenome} | awk {{'print $6'}} | sed 's/,//g' ) # estGenomeSize=$( grep 'Genome Haploid Length' {input.estGenome} | awk {{'print $6'}} | sed 's/,//g' )
...@@ -362,14 +362,14 @@ rule saveConfiguration_and_getKeyValues: ...@@ -362,14 +362,14 @@ rule saveConfiguration_and_getKeyValues:
params: params:
# sampleSheet= config['samplesTSV'], # sampleSheet= config['samplesTSV'],
# config=os.path.join(workflow.basedir, "configuration/config.yaml"), # config=os.path.join(workflow.basedir, "configuration/config.yaml"),
resultsPath=os.path.join(config['Results'],"{sample}"+ "/5_Key_Results") resultsPath=os.path.join(config['Results'],"{sample}"+ "/5_Key_Results"),
keyValues=os.path.join(config['Results'],"{sample}"+ "/5_Key_Results/results_withoutRowNames.tsv"),
rowNames=os.path.join(config['Results'],"{sample}"+ "/5_Key_Results/rowNames.tsv"),
# masteraggregate=os.path.join(config['Results'],"{sample}"+ "/keyResults/aggregate.tsv") # masteraggregate=os.path.join(config['Results'],"{sample}"+ "/keyResults/aggregate.tsv")
output: output:
# newSampleSheet=os.path.join(config['Results'],"{sample}"+ "/keyResults/savedSampleSheet.tsv"), # newSampleSheet=os.path.join(config['Results'],"{sample}"+ "/keyResults/savedSampleSheet.tsv"),
# newConfigFile=os.path.join(config['Results'],"{sample}"+ "/keyResults/savedConfig.yaml"), # newConfigFile=os.path.join(config['Results'],"{sample}"+ "/keyResults/savedConfig.yaml"),
# merqLog=os.path.join(config['Results'],"logs/2_merylAndMerq/" + "{sample}" + "_merqury.spectra-cn.log"), # merqLog=os.path.join(config['Results'],"logs/2_merylAndMerq/" + "{sample}" + "_merqury.spectra-cn.log"),
keyValues=temp(os.path.join(config['Results'],"{sample}"+ "/5_Key_Results/aggregate.tsv")),
rowNames=temp(os.path.join(config['Results'],"{sample}"+ "/5_Key_Results/aggregate_rowNames.tsv")),
keyValuesWithRowNames=os.path.join(config['Results'],"{sample}"+ "/5_Key_Results/{sample}_aggregatedResults.tsv"), keyValuesWithRowNames=os.path.join(config['Results'],"{sample}"+ "/5_Key_Results/{sample}_aggregatedResults.tsv"),
gscopeSum=os.path.join(config['Results'], "{sample}" +"/5_Key_Results/" + "{sample}" + "_gScope_summary.txt"), gscopeSum=os.path.join(config['Results'], "{sample}" +"/5_Key_Results/" + "{sample}" + "_gScope_summary.txt"),
gscopeLog=os.path.join(config['Results'], "{sample}" +"/5_Key_Results/" + "{sample}" + "_gScope_log_plot.png"), gscopeLog=os.path.join(config['Results'], "{sample}" +"/5_Key_Results/" + "{sample}" + "_gScope_log_plot.png"),
...@@ -398,27 +398,29 @@ rule saveConfiguration_and_getKeyValues: ...@@ -398,27 +398,29 @@ rule saveConfiguration_and_getKeyValues:
cp {input.spectraFlat} {output.spectraFlat} cp {input.spectraFlat} {output.spectraFlat}
cp {input.scaffStats} {output.scaffStats} cp {input.scaffStats} {output.scaffStats}
cp {input.contStats} {output.contStats} cp {input.contStats} {output.contStats}
echo "{wildcards.sample}" >> {output.keyValues} echo "{wildcards.sample}" >> {params.keyValues}
echo "$(grep 'Genome Haploid Length' {input.gscopeSum} | awk {{'print $6'}} )" >> {output.keyValues} echo "$(grep 'Genome Haploid Length' {input.gscopeSum} | awk {{'print $6'}} )" >> {params.keyValues}
echo "$(grep 'Heterozygous (ab)' {input.gscopeSum} | awk {{'print $4'}})" >> {output.keyValues} echo "$(grep 'Heterozygous (ab)' {input.gscopeSum} | awk {{'print $4'}})" >> {params.keyValues}
echo "$(awk {{'print $4'}} {input.qv})" >> {output.keyValues} echo "$(awk {{'print $4'}} {input.qv})" >> {params.keyValues}
echo "$(grep 'N50' {input.scaffStats} | awk {{'print $2'}})" >> {output.keyValues} echo "$(grep 'N50' {input.scaffStats} | awk {{'print $2'}})" >> {params.keyValues}
echo "$(grep 'NG50' {input.scaffStats} | awk {{'print $4'}})" >> {output.keyValues} echo "$(grep 'NG50' {input.scaffStats} | awk {{'print $4'}})" >> {params.keyValues}
echo "$(grep 'N95' {input.scaffStats} | awk {{'print $2'}})" >> {output.keyValues} echo "$(grep 'N95' {input.scaffStats} | awk {{'print $2'}})" >> {params.keyValues}
echo "$(grep 'NG95' {input.scaffStats} | awk {{'print $4'}})" >> {output.keyValues} echo "$(grep 'NG95' {input.scaffStats} | awk {{'print $4'}})" >> {params.keyValues}
echo "$(grep 'N100' {input.scaffStats} | awk {{'print $2'}})" >> {output.keyValues} echo "$(grep 'N100' {input.scaffStats} | awk {{'print $2'}})" >> {params.keyValues}
echo "$(grep 'NG100' {input.scaffStats} | awk {{'print $4'}})" >> {output.keyValues} echo "$(grep 'NG100' {input.scaffStats} | awk {{'print $4'}})" >> {params.keyValues}
echo "$(grep 'N50' {input.contStats} | awk {{'print $2'}})" >> {output.keyValues} echo "$(grep 'N50' {input.contStats} | awk {{'print $2'}})" >> {params.keyValues}
echo "$(grep 'NG50' {input.contStats} | awk {{'print $4'}})" >> {output.keyValues} echo "$(grep 'NG50' {input.contStats} | awk {{'print $4'}})" >> {params.keyValues}
echo "$(grep 'total_bps' {input.scaffStats} | awk {{'print $3'}})" >> {output.keyValues} echo "$(grep 'total_bps' {input.scaffStats} | awk {{'print $3'}})" >> {params.keyValues}
echo "$(grep 'sequence_count' {input.scaffStats} | awk {{'print $2'}})" >> {output.keyValues} echo "$(grep 'sequence_count' {input.scaffStats} | awk {{'print $2'}})" >> {params.keyValues}
echo "$(grep 'sequence_count' {input.contStats} | awk {{'print $2'}})" >> {output.keyValues} echo "$(grep 'sequence_count' {input.contStats} | awk {{'print $2'}})" >> {params.keyValues}
echo "$(grep 'number_of_gaps' {input.contStats} | awk {{'print $2'}})" >> {output.keyValues} echo "$(grep 'number_of_gaps' {input.contStats} | awk {{'print $2'}})" >> {params.keyValues}
dos2unix {output.keyValues} dos2unix {params.keyValues}
echo -e "Assembly\nsize_estimate\nmax_heterozygosity\nqv_score\nScaffold_N50_length\nScaffold_NG50_length\nScaffold_N95_length\nScaffold_NG95_length\nScaffold_N100_length\nScaffold_NG100_length\nContig_N50_length\nContig_NG50_length\ntotal_num_bases\ntotal_num_scaffolds\ntotal_num_contigs\nnumber_of_gaps" > {output.rowNames} echo -e "Assembly\nsize_estimate\nmax_heterozygosity\nqv_score\nScaffold_N50_length\nScaffold_NG50_length\nScaffold_N95_length\nScaffold_NG95_length\nScaffold_N100_length\nScaffold_NG100_length\nContig_N50_length\nContig_NG50_length\ntotal_num_bases\ntotal_num_scaffolds\ntotal_num_contigs\nnumber_of_gaps" > {params.rowNames}
paste -d'\t' {output.rowNames} {output.keyValues} > {output.keyValuesWithRowNames} paste -d'\t' {params.rowNames} {params.keyValues} > {output.keyValuesWithRowNames}
printf "Evaluation complete for {wildcards.sample}, please find these results in {params.resultsPath} folder" printf "Evaluation complete for {wildcards.sample}, please find these results in {params.resultsPath} folder"
rm {params.rowNames} {params.keyValues}
""" """
#
# cp {input.checkLog} {output.merqLog} # cp {input.checkLog} {output.merqLog}
#echo "$(grep 'Genome Haploid Length' {input.gscopeSum} | awk {{'print $6'}} | sed 's/,//g')" >> {output.keyValues} put this to remove commas #echo "$(grep 'Genome Haploid Length' {input.gscopeSum} | awk {{'print $6'}} | sed 's/,//g')" >> {output.keyValues} put this to remove commas
......
...@@ -15,7 +15,7 @@ with open(sys.argv[2]) as f: ...@@ -15,7 +15,7 @@ with open(sys.argv[2]) as f:
# #
# size_bp = estSizeFile.readlines() # size_bp = estSizeFile.readlines()
estSize=int(float(sys.argv[6])) estSize=int(float(sys.argv[4]))
if estSize == 0: if estSize == 0:
size_bp = ff[0][:-1] size_bp = ff[0][:-1]
...@@ -286,7 +286,7 @@ if __name__ == "__main__": ...@@ -286,7 +286,7 @@ if __name__ == "__main__":
# contig = csv.writer(open(naming + "_contig_stats.tsv", "w"), delimiter='\t') # contig = csv.writer(open(naming + "_contig_stats.tsv", "w"), delimiter='\t')
# for key, val in contig_stats.items(): # for key, val in contig_stats.items():
# #
with open(sys.argv[7], 'w') as outputfile: with open(sys.argv[5], 'w') as outputfile:
# # print('#' + libraryName, file=outputfile) # # print('#' + libraryName, file=outputfile)
# # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile) # # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile)
# # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile) # # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile)
...@@ -297,7 +297,7 @@ if __name__ == "__main__": ...@@ -297,7 +297,7 @@ if __name__ == "__main__":
print("", file=outputfile) print("", file=outputfile)
print(df_scaffold_L_LG.to_string(index=False), file=outputfile) print(df_scaffold_L_LG.to_string(index=False), file=outputfile)
with open(sys.argv[8], 'w') as outputfile2: with open(sys.argv[6], 'w') as outputfile2:
# # print('#' + libraryName, file=outputfile) # # print('#' + libraryName, file=outputfile)
# # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile) # # print("Total Reads Processed (Paired): " + total_processed + " ( 100 %)", file=outputfile)
# # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile) # # print("Discarded reads (Paired): " + discarded + " ( "+str(discarded_perc)+"%)", file=outputfile)
...@@ -308,21 +308,21 @@ if __name__ == "__main__": ...@@ -308,21 +308,21 @@ if __name__ == "__main__":
print("", file=outputfile2) print("", file=outputfile2)
print(df_contig_L_LG.to_string(index=False), file=outputfile2) print(df_contig_L_LG.to_string(index=False), file=outputfile2)
with open(sys.argv[4], 'w') as outputRst: # with open(sys.argv[4], 'w') as outputRst:
print(tabulate(df_scaffold_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst) # print(tabulate(df_scaffold_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst)
print("", file=outputRst) # print("", file=outputRst)
print(tabulate(df_scaffold_N_NG, headers='keys',tablefmt="rst", showindex=False), file=outputRst) # print(tabulate(df_scaffold_N_NG, headers='keys',tablefmt="rst", showindex=False), file=outputRst)
print("", file=outputRst) # print("", file=outputRst)
print(tabulate(df_scaffold_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst) # print(tabulate(df_scaffold_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst)
print("", file=outputRst) # print("", file=outputRst)
#
with open(sys.argv[5], 'w') as outputRst2: # with open(sys.argv[5], 'w') as outputRst2:
print(tabulate(df_contig_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst2) # print(tabulate(df_contig_top, headers='keys',tablefmt="rst", showindex=False), file=outputRst2)
print("", file=outputRst2) # print("", file=outputRst2)
print(tabulate(df_contig_N_NG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2) # print(tabulate(df_contig_N_NG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2)
print("", file=outputRst2) # print("", file=outputRst2)
print(tabulate(df_contig_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2) # print(tabulate(df_contig_L_LG, headers='keys',tablefmt="rst", showindex=False), file=outputRst2)
print("", file=outputRst2) # print("", file=outputRst2)
# list_of_dfs=[df_scaffold_top,df_scaffold_N_NG,df_scaffold_L_LG] # list_of_dfs=[df_scaffold_top,df_scaffold_N_NG,df_scaffold_L_LG]
# for df in list_of_dfs: # for df in list_of_dfs:
# with open('all_dfs.tsv','a') as f: # with open('all_dfs.tsv','a') as f:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment