diff --git a/Snakefile b/Snakefile index 93ac2d3cb27b03aa53c6f2d31533c41e7e0a9983..018f339c95ddaed998fbbaa152ec913c9a307ced 100644 --- a/Snakefile +++ b/Snakefile @@ -63,8 +63,8 @@ if set(['sample', 'Library_R1', 'Library_R2', 'meryl_kmer_size', 'trim10X', 'tri samples['AdpaterTrimorNot'] = np.where(samples['trimAdapters'] == "True", "AdaptTrimmed", "notAdaptTrimmed") for i in samples['sample'].unique(): d10xtrim[i] = [samples['10xtrimorNot'][j] for j in samples[samples['sample']==i].index] - testDict=samples[['sample','meryl_kmer_size', '10xtrimorNot','AdpaterTrimorNot']] - testDict=testDict.set_index(['sample']).T.to_dict('list') + dictSamples=samples[['sample','meryl_kmer_size', '10xtrimorNot','AdpaterTrimorNot']] + dictSamples=dictSamples.set_index(['sample']).T.to_dict('list') testDictQC=samples[['sample', '10xtrimorNot', 'AdpaterTrimorNot', 'fastQC']] testDictQC = testDictQC[testDictQC['fastQC'] == "True"] @@ -80,9 +80,9 @@ if set(['sample', 'Library_R1', 'Library_R2', 'meryl_kmer_size', 'trim10X', 'tri drunQCtrim = {} for i in runQC['sample'].unique(): drunQCtrim[i] = [runQC['readCounter'][j] for j in runQC[runQC['sample']==i].index] - d = {} + dictReadCounter = {} for i in samples['sample'].unique(): - d[i] = [samples['readCounter'][j] for j in samples[samples['sample']==i].index] + dictReadCounter[i] = [samples['readCounter'][j] for j in samples[samples['sample']==i].index] dkmerSize = {} samples['gzipped_R1']=samples['Library_R1'].apply(gzipped_or_not) samples['gzipped_R2']=samples['Library_R2'].apply(gzipped_or_not) @@ -109,9 +109,9 @@ if set(['sample', 'Library_R1', 'Library_R2', 'meryl_kmer_size', 'trim10X', 'tri ruleAllQCFiles=[] if samples['fastQC'].str.contains('True').any(): ruleAllQCFiles=[expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/illuminaReads/05_multiqc/{sample}.{trim10x}.{trimAdapters}.multiqcReport.html"), sample=key, trim10x=value1, trimAdapters=value2) for key, [value1, value2, value3] in testDictQC.items()] - ruleAll=[expand(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/03_merylDb/complete_{sample}_illuminaDb.{trim10x}.{trimAdapters}.{kmer}.meryl"), sample=key, kmer=value1, trim10x=value2, trimAdapters=value3) for key, [value1, value2, value3] in testDict.items()] + ruleAll=[expand(os.path.join(config['Results'], "0_buildDatabases/{sample}/illuminaReads/03_merylDb/complete_{sample}_illuminaDb.{trim10x}.{trimAdapters}.{kmer}.meryl"), sample=key, kmer=value1, trim10x=value2, trimAdapters=value3) for key, [value1, value2, value3] in dictSamples.items()] -elif set(['sample', 'hifi_reads', 'meryl_kmer_size' ,'trimSMRTbell', 'fastQC']).issubset(samples.columns): +elif set(['sample', 'hifi_reads', 'meryl_kmer_size','trimSMRTbell', 'fastQC']).issubset(samples.columns): whichRule = "rules/build_hifi.smk" samples=samples.reset_index() samples['readCounter'] = samples.groupby(['sample']).cumcount()+1 @@ -121,8 +121,8 @@ elif set(['sample', 'hifi_reads', 'meryl_kmer_size' ,'trimSMRTbell', 'fastQC']). samplesDict=samples.set_index('sample').T.to_dict('list') samples=samples.reset_index() - testDict=samples[['sample','meryl_kmer_size', 'smrtornot']] - testDict=testDict.set_index(['sample']).T.to_dict('list') + dictSamples=samples[['sample','meryl_kmer_size', 'smrtornot']] + dictSamples=dictSamples.set_index(['sample']).T.to_dict('list') testDictQC=samples[['sample', 'smrtornot', 'fastQC']] @@ -161,9 +161,9 @@ elif set(['sample', 'hifi_reads', 'meryl_kmer_size' ,'trimSMRTbell', 'fastQC']). dkmerDups=samples.drop_duplicates('meryl_kmer_size', keep='first') for i in samples['sample'].unique(): dkmerSize[i] = [dkmerDups['meryl_kmer_size'][j] for j in dkmerDups[dkmerDups['sample']==i].index] - d = {} + dictReadCounter = {} for i in samples['sample'].unique(): - d[i] = [samples['readCounter'][j] for j in samples[samples['sample']==i].index] + dictReadCounter[i] = [samples['readCounter'][j] for j in samples[samples['sample']==i].index] samples['gzipped_hifi']=samples['hifi_reads'].apply(gzipped_or_not) @@ -180,7 +180,7 @@ elif set(['sample', 'hifi_reads', 'meryl_kmer_size' ,'trimSMRTbell', 'fastQC']). if samples['fastQC'].str.contains('True').any(): ruleAllQCFiles=[expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/05_multiqc/{sample}.{smrtornot}.multiqcReport.html"), sample=key, smrtornot=value1) for key, [value1, value2] in testDictQC.items()] - ruleAll=[expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/03_merylDb/complete_hifi_{sample}_dB.{smrtornot}.{kmer}.meryl"), sample=key, kmer=value1, smrtornot=value2) for key, [value1, value2] in testDict.items()] + ruleAll=[expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/03_merylDb/complete_hifi_{sample}_dB.{smrtornot}.{kmer}.meryl"), sample=key, kmer=value1, smrtornot=value2) for key, [value1, value2] in dictSamples.items()] elif set(['ID', 'PRI_asm', 'ALT_asm', 'merylDB', 'merylDB_kmer', 'genomeSize', 'HiC_R1', 'HiC_R2']).issubset(samples.columns): whichRule = "rules/evaluate.smk" samples['genomeSize']=samples['genomeSize'].apply(genomeSize_auto_or_not) @@ -239,7 +239,7 @@ elif set(['ID', 'PRI_asm', 'ALT_asm', 'merylDB', 'merylDB_kmer', 'genomeSize', ' testDict=samples.T.to_dict('list') ruleAllQCFiles=[] - ruleAll=expand(os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_aggregatedResults.tsv"), asmID=list(testDict.keys())), os.path.join(config['Results'],"1_evaluation/finalResults/Combined_Results_FULLTABLE.tsv"), os.path.join(config['Results'],"1_evaluation/finalResults/FULL_Report_PDF.pdf"),os.path.join(config['Results'],"1_evaluation/finalResults/FULL_TABLE_PDF.pdf") + ruleAll=expand(os.path.join(config['Results'],"1_evaluation/{asmID}/06_keyResults/{asmID}_aggregatedResults.tsv"), asmID=list(dictSamples.keys())), os.path.join(config['Results'],"1_evaluation/finalResults/Combined_Results_FULLTABLE.tsv"), os.path.join(config['Results'],"1_evaluation/finalResults/FULL_Report_PDF.pdf"),os.path.join(config['Results'],"1_evaluation/finalResults/FULL_TABLE_PDF.pdf") else: raise ValueError('Sample Sheet for not recognised. Please make sure you are using the correct sample sheet')