Skip to content
Snippets Groups Projects
Commit 5d44efe6 authored by james94's avatar james94
Browse files

comments

parent d57f9d47
No related branches found
No related tags found
No related merge requests found
...@@ -13,28 +13,34 @@ import argparse, subprocess ...@@ -13,28 +13,34 @@ import argparse, subprocess
container: "docker://gepdocker/gep_dockerimage:latest" container: "docker://gepdocker/gep_dockerimage:latest"
configfile: "configuration/config.yaml" configfile: "configuration/config.yaml"
### LOAD IN RESOURCES CONFIG ###
with open(config['resources'], 'r') as f: with open(config['resources'], 'r') as f:
resource = yaml.safe_load(f) resource = yaml.safe_load(f)
### GET BASENAME FOR READS WILDCARDS ###
def getBasename4Reads(path): def getBasename4Reads(path):
base=os.path.basename(path) base=os.path.basename(path)
return os.path.splitext(base)[0] return os.path.splitext(base)[0]
### CHECK IF INPUT IS GZIPPED ###
def gzipped_or_not(path): def gzipped_or_not(path):
trueORfalse=path.endswith('.gz') trueORfalse=path.endswith('.gz')
return trueORfalse return trueORfalse
### CHECK IF HIC AND/OR ALT_ASM FILES ARE GIVEN, OR VALUE IS NONE ###
def FILE_present_or_not(path): def FILE_present_or_not(path):
if path == 'None': if path == 'None':
return False return False
return True return True
### CHECK IF GENOME_SIZE IS PROVIDED, OR VALUE IS AUTO ###
def genomeSize_auto_or_not(given_size): def genomeSize_auto_or_not(given_size):
if given_size == 'auto': if given_size == 'auto':
return 0 return 0
return given_size return given_size
def addUniqueLibraryID(library): def addUniqueLibraryID(library):
string2Add=library + "_1" string2Add=library + "_1"
...@@ -51,18 +57,10 @@ if set(['sample', 'Library_R1', 'Library_R2', 'meryl_kmer_size', 'trim10X', 'tri ...@@ -51,18 +57,10 @@ if set(['sample', 'Library_R1', 'Library_R2', 'meryl_kmer_size', 'trim10X', 'tri
samples['readCounter'] = samples.groupby(['sample']).cumcount()+1 samples['readCounter'] = samples.groupby(['sample']).cumcount()+1
samples['readCounter'] = samples['readCounter'].astype(str) samples['readCounter'] = samples['readCounter'].astype(str)
samples['readCounter'] = samples['sample'] + "_LibraryPair" + samples['readCounter'] samples['readCounter'] = samples['sample'] + "_LibraryPair" + samples['readCounter']
# samples=samples.reset_index()
# trim10x_table = samples[samples['trim10X'] == "True"]
# trim10x_table=trim10x_table.set_index(['sample','readCounter'])
# notrim10xwithAdapt= samples[(samples['trim10X'] == "False") & (samples['trimAdapters'] == "True") ]
# notrim10xwithAdapt=notrim10xwithAdapt.set_index(['sample','readCounter'])
# notrim10xorAdapt= samples[(samples['trim10X'] == "False") & (samples['trimAdapters'] == "False") ]
# notrim10xorAdapt=notrim10xorAdapt.set_index(['sample','readCounter'])
# d10xtrim = {}
samples['10xtrimorNot'] = np.where(samples['trim10X'] == "True", "10xTrimmed", "not10xTrimmed") samples['10xtrimorNot'] = np.where(samples['trim10X'] == "True", "10xTrimmed", "not10xTrimmed")
samples['AdpaterTrimorNot'] = np.where(samples['trimAdapters'] == "True", "AdaptTrimmed", "notAdaptTrimmed") samples['AdpaterTrimorNot'] = np.where(samples['trimAdapters'] == "True", "AdaptTrimmed", "notAdaptTrimmed")
# for i in samples['sample'].unique():
# d10xtrim[i] = [samples['10xtrimorNot'][j] for j in samples[samples['sample']==i].index]
dictSamples=samples[['sample','meryl_kmer_size', '10xtrimorNot','AdpaterTrimorNot']] dictSamples=samples[['sample','meryl_kmer_size', '10xtrimorNot','AdpaterTrimorNot']]
dictSamples=dictSamples.set_index(['sample']).T.to_dict('list') dictSamples=dictSamples.set_index(['sample']).T.to_dict('list')
...@@ -72,14 +70,7 @@ if set(['sample', 'Library_R1', 'Library_R2', 'meryl_kmer_size', 'trim10X', 'tri ...@@ -72,14 +70,7 @@ if set(['sample', 'Library_R1', 'Library_R2', 'meryl_kmer_size', 'trim10X', 'tri
testDictQC=testDictQC.set_index(['sample']).T.to_dict('list') testDictQC=testDictQC.set_index(['sample']).T.to_dict('list')
trimAdapters = samples[samples['trimAdapters'] == "True"] trimAdapters = samples[samples['trimAdapters'] == "True"]
# dAdaptertrim = {}
# for i in trimAdapters['sample'].unique():
# dAdaptertrim[i] = [trimAdapters['readCounter'][j] for j in trimAdapters[trimAdapters['sample']==i].index]
#
# runQC = samples[samples['fastQC'] == "True"]
# drunQCtrim = {}
# for i in runQC['sample'].unique():
# drunQCtrim[i] = [runQC['readCounter'][j] for j in runQC[runQC['sample']==i].index]
dictReadCounter = {} dictReadCounter = {}
for i in samples['sample'].unique(): for i in samples['sample'].unique():
dictReadCounter[i] = [samples['readCounter'][j] for j in samples[samples['sample']==i].index] dictReadCounter[i] = [samples['readCounter'][j] for j in samples[samples['sample']==i].index]
...@@ -118,9 +109,7 @@ elif set(['sample', 'hifi_reads', 'meryl_kmer_size','trimSMRTbell', 'fastQC']).i ...@@ -118,9 +109,7 @@ elif set(['sample', 'hifi_reads', 'meryl_kmer_size','trimSMRTbell', 'fastQC']).i
samples['readCounter'] = samples['readCounter'].astype(str) samples['readCounter'] = samples['readCounter'].astype(str)
samples['readCounter'] = samples['sample'] + "_Library" + samples['readCounter'] samples['readCounter'] = samples['sample'] + "_Library" + samples['readCounter']
samples['smrtornot'] = np.where(samples['trimSMRTbell'] == "True", "smrtTrimmed", "notsmrtTrimmed") samples['smrtornot'] = np.where(samples['trimSMRTbell'] == "True", "smrtTrimmed", "notsmrtTrimmed")
# samplesDict=samples.set_index('sample').T.to_dict('list')
# samples=samples.reset_index()
dictSamples=samples[['sample','meryl_kmer_size', 'smrtornot']] dictSamples=samples[['sample','meryl_kmer_size', 'smrtornot']]
dictSamples=dictSamples.drop_duplicates('sample', keep='first') dictSamples=dictSamples.drop_duplicates('sample', keep='first')
dictSamples=dictSamples.set_index(['sample']).T.to_dict('list') dictSamples=dictSamples.set_index(['sample']).T.to_dict('list')
...@@ -129,42 +118,10 @@ elif set(['sample', 'hifi_reads', 'meryl_kmer_size','trimSMRTbell', 'fastQC']).i ...@@ -129,42 +118,10 @@ elif set(['sample', 'hifi_reads', 'meryl_kmer_size','trimSMRTbell', 'fastQC']).i
testDictQC=samples[['sample', 'smrtornot', 'fastQC']] testDictQC=samples[['sample', 'smrtornot', 'fastQC']]
testDictQC = testDictQC[testDictQC['fastQC'] == "True"] testDictQC = testDictQC[testDictQC['fastQC'] == "True"]
# testDictQC=testDictQC.drop_duplicates('sample', keep='first')
testDictQC=testDictQC.set_index(['sample']).T.to_dict('list') testDictQC=testDictQC.set_index(['sample']).T.to_dict('list')
# runQC = samples[samples['fastQC'] == "True"]
#
# drunQCtrim = {}
# for i in runQC['sample'].unique():
# drunQCtrim[i] = [runQC['readCounter'][j] for j in runQC[runQC['sample']==i].index]
#
# runQC=runQC.set_index(['sample','readCounter'])
#
#
# trimSMRTbell=samples.loc[samples['trimSMRTbell'] == "True"]
# dtrimSMRTbell = {}
# for i in trimSMRTbell['sample'].unique():
# dtrimSMRTbell[i] = [trimSMRTbell['readCounter'][j] for j in trimSMRTbell[trimSMRTbell['sample']==i].index]
# trimSMRTbell=trimSMRTbell.set_index(['sample','readCounter'])
# notrimSMRTbell = samples[samples['trimSMRTbell'] == "False"]
# notrimSMRTbell=notrimSMRTbell.set_index(['sample','readCounter'])
# dsmrtornot = {}
# dfsmrtornot=samples.drop_duplicates('smrtornot', keep='first')
#
# for i in samples['sample'].unique():
# dsmrtornot[i] = [dfsmrtornot['smrtornot'][j] for j in dfsmrtornot[dfsmrtornot['sample']==i].index]
# for i in samples['sample'].unique():
# dsmrtornot[i] = [samples['smrtornot'][j] for j in samples[samples['sample']==i].index]
#
#
# dkmerSize = {}
# dkmerDups=samples.drop_duplicates('meryl_kmer_size', keep='first')
# for i in samples['sample'].unique():
# dkmerSize[i] = [dkmerDups['meryl_kmer_size'][j] for j in dkmerDups[dkmerDups['sample']==i].index]
dictReadCounter = {} dictReadCounter = {}
for i in samples['sample'].unique(): for i in samples['sample'].unique():
dictReadCounter[i] = [samples['readCounter'][j] for j in samples[samples['sample']==i].index] dictReadCounter[i] = [samples['readCounter'][j] for j in samples[samples['sample']==i].index]
...@@ -244,11 +201,13 @@ elif set(['ID', 'ASM_LEVEL', 'PRI_asm', 'ALT_asm', 'merylDB', 'merylDB_kmer', 'g ...@@ -244,11 +201,13 @@ elif set(['ID', 'ASM_LEVEL', 'PRI_asm', 'ALT_asm', 'merylDB', 'merylDB_kmer', 'g
dictSamples=samples.T.to_dict('list') dictSamples=samples.T.to_dict('list')
ruleAllQCFiles=[] ruleAllQCFiles=[]
ruleAll=expand(os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_aggregatedResults.tsv"), asmID=list(dictSamples.keys())),\ ruleAll=expand(os.path.join(config['Results'],"1_evaluation/{asmID}/KEY_RESULTS/{asmID}_aggregatedResults.tsv"), asmID=list(dictSamples.keys())),\
os.path.join(config['Results'],"1_evaluation/finalResults/Combined_Results_FULLTABLE.tsv"), \ os.path.join(config['Results'],"1_evaluation/finalResults/TABLE_OF_RESULTS.tsv"), \
os.path.join(config['Results'],"1_evaluation/finalResults/FULL_REPORT.pdf") os.path.join(config['Results'],"1_evaluation/finalResults/FULL_REPORT.pdf")
### DOWNLOAD BUSCO LINEAGE (IF IT DOESN'T ALREADY EXIST) ####
args_o=os.path.join(workflow.basedir, "buscoLineage") args_o=os.path.join(workflow.basedir, "buscoLineage")
args_l=config['busco5Lineage'] args_l=config['busco5Lineage']
checkLineagePath=args_o + "/" + args_l + "_odb10" checkLineagePath=args_o + "/" + args_l + "_odb10"
...@@ -298,11 +257,6 @@ elif set(['ID', 'ASM_LEVEL', 'PRI_asm', 'ALT_asm', 'merylDB', 'merylDB_kmer', 'g ...@@ -298,11 +257,6 @@ elif set(['ID', 'ASM_LEVEL', 'PRI_asm', 'ALT_asm', 'merylDB', 'merylDB_kmer', 'g
else: else:
raise ValueError("Error - could not identify lineage please check busco site for a correct prefix") raise ValueError("Error - could not identify lineage please check busco site for a correct prefix")
# os.path.join(config['Results'],"1_evaluation/finalResults/FULL_Report_PDF.pdf"), \
# os.path.join(config['Results'],"1_evaluation/finalResults/FULL_TABLE_PDF.pdf"), \
# os.path.join(config['Results'],"1_evaluation/finalResults/FullTableCOLOUREDheatmap.pdf"), \
# os.path.join(config['Results'],"1_evaluation/finalResults/FullTableGRADIENTheatmap.pdf")
else: else:
raise ValueError('Sample Sheet for not recognised. Please make sure you are using the correct sample sheet') raise ValueError('Sample Sheet for not recognised. Please make sure you are using the correct sample sheet')
...@@ -314,13 +268,13 @@ else: ...@@ -314,13 +268,13 @@ else:
if "Results" not in config: if "Results" not in config:
config["Results"] = "results" config["Results"] = "results"
report: "report/workflow.rst"
include: whichRule include: whichRule
inputs = ruleAllQCFiles, ruleAll final_target_outputs = ruleAllQCFiles, ruleAll
rule all: rule all:
input: input:
inputs final_target_outputs
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment