Skip to content
Snippets Groups Projects
Commit 68d9c918 authored by dimit98's avatar dimit98
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
Showing
with 182 additions and 0 deletions
[submodule "scripts/Specter"]
path = scripts/Specter
url = https://github.com/canzarlab/Specter
import os
import pandas as pd
configfile: "config.yaml"
samples = pd.read_table(config["data"]["samples"], index_col="sample")
units = pd.read_table(config["data"]["units"])
rule all:
input:
"figures/slingshot_data_vis.jpg","figures/monocle_trajectory.jpg", dynamic("figures/{cluster}_enrichment_vis.pdf") if len(units.columns) != 3 else []
include: "rules/preprocessing.smk"
include: "rules/read_in.smk"
include: "rules/quality_control.smk"
include: "rules/normalization.smk"
include: "rules/visualization.smk"
include: "rules/clustering.smk"
include: "rules/slingshot_R.smk"
include: "rules/monocle2.smk"
include: "rules/differential_testing.smk"
include: "rules/gene_set_analysis.smk"
#User settings:
data:
samples: "" #path to the samples-file
units: "" #path to the units-file
general:
amount_of_hvgs: "" #amount of highly variable genes used in the analysis
preprocessing:
path_to_bamtofastq: "" #path to the bamtofastq program
path_to_cellranger: "" #path to the cellranger program
path_to_ref: "" #path to the reference genome used
quality_control:
cells: #filter of cells
upper_quantile_counts: "0.95" #maximum counts per cell; use one parameter, leave the other as ""
max_counts: ""
lower_quantile_counts: "0.05" #minimum counts per cell; use one paramter, leave the other as ""
min_counts: ""
lower_quantile_genes: "0.05" #minimum amount of genes per cell; use one, leave the other as ""
min_genes: ""
mt_frac: "" #maximum proportion of mitochondrial genes per cell
genes:
min_cells: "" #minimum amount of cells a gene has to be expressed in
downsampling:
downsampling_method: "normal" #options: "sphetcher" or "normal"; "normal" = no downsampling
path_to_sphetcher: "" #path to sphetchers src-folder (only used if downsampling_method = "sphetcher")
sketch_size: "" #amount of cells after downsampling (only used if downsampling_method = "sphetcher")
cell_cycle_scoring:
ref_genes: "" #used for visualization of cell cycle effects; tsv-file with 2 columns(s and g2m) defining cell cycle genes; OPTIONAL
clustering:
celltypes_markergenes: "" #known celltypes with their marker genes; tsv-file with 2 columns(celltype and markergenes), celltype definies a specific celltype & markergenes its markergenes as a list
genes_to_vis: "" #genes that are visualized across the clustering; OPTIONAL
subclustering: "" #options: "" or "True"; "" = no automatic subclustering
clustering_resolution: "0.5" #sensitivity parameter for the louvain-algorithm
cluster_method: "louvain" #options: "specter" or "louvain"; used algorithm for clustering
specter: #parameters only used if clustering:cluster_method = "specter"
number_of_clusters: "8" #number of clusters in the final clustering
ensemble_size: "200"
mingamma: "0.1"
subclustering:
subclustering_resolution: "0.2" #sensitivity parameter for the lovain-algorithm
names_for_unannotated: "" #names to use for the unannotated clusters; OPTIONAL
further_subclusterings: "" #clusters to subcluster; OPTIONAL
trajectory_inference:
clusters_to_include: "" #clusters that are used for trajectory inference; all clusters used if parameter is ""
trajectory_start: "" #cluster thats the starting point of the trajectory; random cluster chosen if parameter is ""
trajectory_ending: "" #cluster thats the end point of the trajectory; random cluster chosen if parameter is ""
differential_testing:
clusters: "" #clusters used for differential testing, if a subcluster is supposed to be tested: write cluster-0 instead of cluster,0; all clusters used if parameter is ""
DE_threshold: "0.01" #threshold for p-values of differentially expressed genes
gene_set_enrichment_analysis:
enrichment_threshold: "0.05" #threshold for p-values of enriched GO-terms
organism: "" #analysed organism
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- bioconductor-singlecellexperiment
- bioconductor-mast
- r-seurat==3.2.0
- bioconductor-scater
- r-hdf5r
- bioconductor-rhdf5
channels:
- r
dependencies:
- r-dplyr
- r-ggplot2
channels:
- bioconda
- conda-forge
- anaconda
dependencies:
- gprofiler-official
- pandas
- matplotlib
channels:
- bioconda
- conda-forge
dependencies:
- bioconductor-monocle
- bioconductor-biobase
- r-rcolorbrewer
- r-base
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- anndata==0.6.22.post1
- seaborn
- umap-learn=0.3.9
- scipy
- scanpy
- python-igraph
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- anndata==0.6.22.post1
- seaborn
- umap-learn=0.3.9
- scipy
- scanpy
- python-igraph
- h5py==2.9.0
channels:
- conda-forge
- bioconda
- r
dependencies:
- bioconductor-scran
- r==3.5.1
- r-base
channels:
- r
- bioconda
- conda-forge
dependencies:
- r-rcolorbrewer
- bioconductor-slingshot
- bioconductor-singlecellexperiment==1.10.1
- r
- r-seurat
- bioconductor-scater
- bioconductor-clusterexperiment
- r-gam
- r-base
- r-hdf5r
- bioconductor-rhdf5
Visualisierung der annotierten Trajektorie.
Visualisierung der annotierten Trajektorie auf Basis der nicht batch-korrigierten Daten.
Visualisierung von dimensionsreduzierten Daten mit Hilfe von diffusion maps.
Visualisierung der sog. Diffusion Pseudotime, welche die Daten in einer Abfolge, basierend auf transkriptionellen Ähnlichkeiten zwischen den Zellen, visualisiert.
Visualisierung der sog. Diffusion Pseudotime, welche die Daten in einer Abfolge, basierend auf transkriptionellen Ähnlichkeiten zwischen den Zellen, visualisiert.
Visualisierung von dimensionsreduzierten Daten mit Hilfe des Drawgraph Algorithmus.
Visualisierung der 10 am signifikantesten angereicherten GO-Terme für das Cluster: {{snakemake.wildcards.cluster}}. Die GO-Terme sind auf der y-Achse dargestellt, während auf der x-Achse das Verhältnis zwischen differentiell exprimierten Genen eines GO-Terms und nicht differentiell exprimierten Genen dieses GO-Terms dargestellt wird. Farblich markiert ist der P-Wert und die Größe der Kreise markiert die Anzahl differentiell exprimierter Gene.
Abgebildet ist die Dispersion der Gene in normalisierter und nicht-normalisierter Form in Abhängigkeit von den durchschnittlichen Expressionen der Gene. Stark variable Gene sind dabei durch die dunkle Farbe hervorgehoben.
Visualisierung der am stärksten exprimierten Gene in jedem Cluster.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment