Skip to content
Snippets Groups Projects
Commit 9a570550 authored by valegale's avatar valegale
Browse files

fixing longqc and nanoplot, making longqc optional, adding a parameter to...

fixing longqc and nanoplot, making longqc optional, adding a parameter to distinguish between pacbio and nanopore
parent 372377cd
No related branches found
No related tags found
2 merge requests!5fixing longqc and nanoplot, making longqc optional, adding a parameter to...,!4fixing longqc and nanoplot, making longqc optional, adding a parameter to...
......@@ -340,7 +340,7 @@ nanoplot_hifi:
longQC_hifi:
jobname: GEP.{rule}.{wildcards.sample}.{wildcards.readCounter}.{wildcards.smrtornot}
mem_mb: 12000
mem_mb: 96000
time: "04:00:05"
multiqc_hifi:
......
......@@ -139,7 +139,7 @@ elif set(['sample', 'hifi_reads', 'meryl_kmer_size','trimSMRTbell', 'fastQC']).i
samples=samples.set_index(['sample','readCounter'])
longqc_dir = "tools/LongQC"
sequencing_type = "pb-sequel" if config["use_pacbio"] else "ont-rapid"
ruleAllQCFiles=[]
### TODO change this column in a generic QC analysis. (both nanoplot and and longqc running)
......
### PATH TO WHERE YOU WANT YOUR RESULTS FOR THIS RUN STORED - WILL BE AUTOMATICALLY CREATED IF IT DOESN'T EXIST ###
Results: "/home/valentina/Desktop/test_gep/results_test_new"
Results: "/home/valentina/Desktop/test_gep/results_test_new_11"
### FULL PATH TO YOUR SAMPLESHEET ###
samplesTSV: "/home/valentina/Desktop/test_gep/build_hifi_example.tsv"
samplesTSV: "/home/valentina/Desktop/test_gep/build_nano_example.tsv"
### PATH TO DEFINE RESOURCES FILE - DO NOT CHANGE UNLESS YOU WISH TO USE IN A DIFFERENT LOCATION ####
resources: "configuration/define_resources.yaml"
### With EAR=True the ERGA Assembly Report is automatically generated from the analysis
EAR: False
smudgeplot: False
kmer_plot_flat: False
use_pacbio: True
run_longqc: True
### With EAR=True the ERGA Assembly Report is automatically generated from the analysis
EAR: False
......@@ -186,3 +186,8 @@ merylUnion_hifi:
mem_mb: 96000
time: "12:00:00"
threads: 12
install_longqc:
mem_mb: 4000
time: "01:00:00"
threads: 1
......@@ -69,7 +69,7 @@ rule trimSMRTBellAdapters_hifi:
log:
os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/trimSMRTbell.{readCounter}.log")
priority:
15
2
conda:
os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml")
shell:
......@@ -110,11 +110,18 @@ rule fastQC_hifi:
(fastqc {input} -o {params.folder2out} -t {threads}) &> {log}
"""
def longqc_input(wildcards):
# if longqc is disactivated by the user, this input function return the output of nanoplot rather than the longQC one
if config["run_longqc"]:
return os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/longQC/web_summary.html")
else:
return os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/nanoplot/NanoPlot-report.html")
rule multiQC_hifi:
input:
lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/fastqc/{readCounter}.{smrtornot}_fastqc.html"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], smrtornot=dictSamples[wildcards.sample][1]),
os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/nanoplot/NanoPlot-report.html"),
os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/longQC/web_summary.html")
longqc_input
params:
folder2qc=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/fastqc/"),
folder2qc2=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/nanoplot/"),
......@@ -145,7 +152,7 @@ rule merylCount_hifi:
log:
os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/meryl_hifi_count.{readCounter}.{kmer}.{smrtornot}.log")
priority:
10
1
conda:
os.path.join(workflow.basedir, "envs/MER_STATS.yaml")
shell:
......@@ -155,7 +162,9 @@ rule merylCount_hifi:
rule merylUnion_hifi:
input:
lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/merylDb/{readCounter}_hifi_dB.{smrtornot}.{kmer}.meryl/"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], kmer=dictSamples[wildcards.sample][0], smrtornot=dictSamples[wildcards.sample][1])
db = lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/merylDb/{readCounter}_hifi_dB.{smrtornot}.{kmer}.meryl/"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], kmer=dictSamples[wildcards.sample][0], smrtornot=dictSamples[wildcards.sample][1]),
output_nanoplot = os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/nanoplot/NanoPlot-report.html"),
output_longqc = longqc_input
params:
kmer = "{kmer}",
removeReadDIR_trimmed=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/"),
......@@ -170,12 +179,12 @@ rule merylUnion_hifi:
log:
os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/meryl_hifi_combine.{sample}.{kmer}.log")
priority:
10
1
conda:
os.path.join(workflow.basedir, "envs/MER_STATS.yaml")
shell:
"""
(meryl union-sum {input} output {output}) &> {log}
(meryl union-sum {input.db} output {output}) &> {log}
rm -r {params.removeReadDIR_trimmed}
rm -r {params.removeReadDIR_unzipped}
"""
......@@ -194,6 +203,8 @@ rule NanoPlot:
time=resource['nanoplot_hifi']['time'],
log:
os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/nanoplot.log")
priority:
2
conda:
os.path.join(workflow.basedir, "envs/UNZIP_and_QC.yaml")
shell:
......@@ -206,7 +217,8 @@ rule longQC:
fastq = lambda wildcards: expand(os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/temp_trimReads/{readCounter}.{smrtornot}.fastq"), sample=wildcards.sample, readCounter=dictReadCounter[wildcards.sample], smrtornot=dictSamples[wildcards.sample][1]),
path_lonqqc = os.path.join(longqc_dir,"longQC.py")
params:
folder2out=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/longQC/")
folder2out=os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/longQC/"),
sequencing_type = sequencing_type
output:
os.path.join(config['Results'],"0_buildDatabases/{sample}/hifiReads/QC/longQC/web_summary.html")
threads:
......@@ -216,13 +228,14 @@ rule longQC:
time=resource['longQC_hifi']['time'],
log:
os.path.join(config['Results'], "0_buildDatabases/{sample}/hifiReads/logs/longQC.log")
priority:
2
conda:
os.path.join(workflow.basedir, "envs/LONGQC.yaml")
shell:
"""
## TODO making either pacbio or nanopore for -x
rmdir {params.folder2out}
(python tools/LongQC/longQC.py sampleqc -x pb-sequel -o {params.folder2out} {input.fastq}) &> {log}
rm -r {params.folder2out}
(python tools/LongQC/longQC.py sampleqc -x {params.sequencing_type} -o {params.folder2out} {input.fastq}) &> {log}
"""
rule install_longqc:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment