Skip to content
Snippets Groups Projects
Commit 1fb5c77a authored by james94's avatar james94
Browse files

Handle pre-built

parent 1653442b
No related branches found
No related tags found
No related merge requests found
......@@ -32,10 +32,12 @@ if 'Library_R1' in samples.columns:
samples = samples1.set_index('assemblyName')
# print(samples)
samples=samples.join(samples6['est_bp'])
print(list(samples.index))
print("The following assemblies will be evaluated: ", list(samples.index))
elif 'preBuiltMerylDatabase' in samples.columns:
print(list(samples.index))
samples['est_bp'] = samples['est_bp'].fillna(0)
samples["est_bp"] = pd.to_numeric(samples["est_bp"])
print("The following assemblies will be evaluated: ", list(samples.index))
else:
raise ValueError('Sample Sheet structure not recognised. Please make sure you are using the correct sample sheet (prebuilt or non-prebuilt meryl db, etc)')
......
......@@ -239,7 +239,7 @@ rule merqury:
report(os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.{sample}.spectra-cn.fl.png"), caption="../report/merqury/spectra-cn_filled.rst", category="Copy Number Spectra Plots", subcategory="{sample}"),
os.path.join(config['Results'], "{sample}" +"/2_QVstats_merylAndMerqury/combinedRead1Read2.21.hist"),
symlink_fasta=os.path.join(config['Results'], "{sample}" + "/{sample}.fasta"),
symlink_merylDB=os.path.join(config['Results'], "{sample}" + "/combinedRead1Read2.21.meryl")
symlink_merylDB=directory(os.path.join(config['Results'], "{sample}" + "/combinedRead1Read2.21.meryl"))
log:
os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/logs/{sample}_merqury.log")
priority:
......@@ -272,7 +272,7 @@ rule genomescope2:
check2=os.path.join(config['Results'],"{sample}" + "/2_QVstats_merylAndMerqury/" + "{sample}" + "_merq.{sample}.spectra-cn.st.png")
params:
outFolder=os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/"),
cpHist=os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/combinedRead1Read2.21.histo"),
cpHist=os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/combinedRead1Read2_10k.21.histo"),
findGscope=os.path.join(workflow.basedir)
output:
summary=report(os.path.join(config['Results'], "{sample}" +"/3_genomescopeProfile/" + "{sample}" + "_summary.txt"), caption="../report/genomescope/summary.rst", category="Genomescope Profile", subcategory="{sample}"),
......@@ -283,12 +283,12 @@ rule genomescope2:
"../envs/genomescope.yaml"
shell:
"""
expand -t 1 {input.hist} > {params.cpHist}
head -n 10000 {input.hist} > {params.cpHist}
export genomescope2script=$(find {params.findGscope} -name genomescope2)
Rscript $genomescope2script -i {params.cpHist} -o {params.outFolder} -k 21 -n {wildcards.sample}
grep "Genome Haploid Length" {output.summary} | awk {{'print $6'}} | sed 's/,//g'> {output.estimatedSize}
"""
# expand -t 1 {input.hist} > {params.cpHist}
rule assemblyStats:
input:
......@@ -438,7 +438,7 @@ rule aggregateAllAssemblies:
"""
cp {input.sampleSheet} {output.newSampleSheet}
cp {input.config} {output.newConfigFile}
echo -e "AAssembly\nsize_estimate\nmax_heterozygosity\nqv_score\nScaffold_N50_length\nScaffold_NG50_length\nScaffold_N95_length\nScaffold_NG95_length\nScaffold_N100_length\nScaffold_NG100_length\nContig_N50_length\nContig_NG50_length\ntotal_num_bases\ntotal_num_scaffolds\ntotal_num_contigs\nnumber_of_gaps" | \
echo -e "Assembly\nsize_estimate\nmax_heterozygosity\nqv_score\nScaffold_N50_length\nScaffold_NG50_length\nScaffold_N95_length\nScaffold_NG95_length\nScaffold_N100_length\nScaffold_NG100_length\nContig_N50_length\nContig_NG50_length\ntotal_num_bases\ntotal_num_scaffolds\ntotal_num_contigs\nnumber_of_gaps" | \
paste -d'\t' - {input.allResults} | \
awk -F'\t' '{{s="";for (i=1;i<=NF;i+=2) {{s=s?s FS $i:$i}} print s}}' | \
column -t > {output.results}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment