library(SingleR)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
##
## Attaching package: 'sp'
## The following object is masked from 'package:IRanges':
##
## %over%
##
## Attaching package: 'SeuratObject'
## The following object is masked from 'package:SummarizedExperiment':
##
## Assays
## The following object is masked from 'package:GenomicRanges':
##
## intersect
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following object is masked from 'package:IRanges':
##
## intersect
## The following object is masked from 'package:S4Vectors':
##
## intersect
## The following object is masked from 'package:BiocGenerics':
##
## intersect
## The following objects are masked from 'package:base':
##
## intersect, t
##
## Attaching package: 'Seurat'
## The following object is masked from 'package:SummarizedExperiment':
##
## Assays
library(celldex)
##
## Attaching package: 'celldex'
## The following objects are masked from 'package:SingleR':
##
## BlueprintEncodeData, DatabaseImmuneCellExpressionData,
## HumanPrimaryCellAtlasData, ImmGenData, MonacoImmuneData,
## MouseRNAseqData, NovershternHematopoieticData
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(pheatmap)
#your seurat object from Firday
sobj <- readRDS("../bc_scRNAseq/output/bc_tutorial.rds")
check your seurat object
sobj
## An object of class Seurat
## 18054 features across 3498 samples within 1 assay
## Active assay: RNA (18054 features, 1000 variable features)
## 3 layers present: counts, data, scale.data
## 2 dimensional reductions calculated: pca, umap
We need to extract the normalization data using
GetAssayData()
sobj_count <- GetAssayData(sobj, layer = "data")
str(sobj_count)
## Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## ..@ i : int [1:25868904] 0 1 6 7 8 9 10 13 14 16 ...
## ..@ p : int [1:3499] 0 8583 15839 25530 35501 44226 52914 62061 69292 79025 ...
## ..@ Dim : int [1:2] 18054 3498
## ..@ Dimnames:List of 2
## .. ..$ : chr [1:18054] "SAMD11" "NOC2L" "KLHL17" "PLEKHN1" ...
## .. ..$ : chr [1:3498] "AACAATGTGCTCCGAG-1" "AACACCATTCGCATAC-1" "AACACGTTGATACCGC-1" "AACACTCGTGAGCTTC-1" ...
## ..@ x : num [1:25868904] 0.727 1.664 0.538 3.008 0.305 ...
## ..@ factors : list()
Get the Blueprint and Encode reference data from celldex package:
ref <- celldex::BlueprintEncodeData()
ref
## class: SummarizedExperiment
## dim: 19859 259
## metadata(0):
## assays(1): logcounts
## rownames(19859): TSPAN6 TNMD ... LINC00550 GIMAP1-GIMAP5
## rowData names(0):
## colnames(259): mature.neutrophil
## CD14.positive..CD16.negative.classical.monocyte ...
## epithelial.cell.of.umbilical.artery.1
## dermis.lymphatic.vessel.endothelial.cell.1
## colData names(3): label.main label.fine label.ont
pred <- SingleR(test=sobj_count, ref=ref, labels=ref$label.main)
pred %>% head()
## DataFrame with 6 rows and 4 columns
## scores labels delta.next
## <matrix> <character> <numeric>
## AACAATGTGCTCCGAG-1 0.324793:0.418997:0.323649:... Epithelial cells 0.06219701
## AACACCATTCGCATAC-1 0.345131:0.400975:0.324331:... Epithelial cells 0.14348376
## AACACGTTGATACCGC-1 0.368779:0.458628:0.328615:... Mesangial cells 0.12183469
## AACACTCGTGAGCTTC-1 0.376044:0.458448:0.340945:... Epithelial cells 0.00207031
## AACAGGAATTCTGTGA-1 0.403179:0.433582:0.298729:... Mesangial cells 0.09215977
## AACAGGATGCTGGCAT-1 0.365389:0.400806:0.345930:... Epithelial cells 0.09739350
## pruned.labels
## <character>
## AACAATGTGCTCCGAG-1 Epithelial cells
## AACACCATTCGCATAC-1 Epithelial cells
## AACACGTTGATACCGC-1 Mesangial cells
## AACACTCGTGAGCTTC-1 Epithelial cells
## AACAGGAATTCTGTGA-1 Mesangial cells
## AACAGGATGCTGGCAT-1 Epithelial cells
unique(pred$labels)
## [1] "Epithelial cells" "Mesangial cells" "Fibroblasts" "Astrocytes"
## [5] "Macrophages" "Adipocytes" "DC"
Next, we put the prediction results back to our seurat object for visualization
sobj$labels <- pred[match(rownames(sobj@meta.data), rownames(pred)), 'labels']
sobj@meta.data %>% head()
## orig.ident nCount_RNA nFeature_RNA percent.mt
## AACAATGTGCTCCGAG-1 Ovarian_Carcinoma 28045 8583 2.824033
## AACACCATTCGCATAC-1 Ovarian_Carcinoma 21851 7256 2.663494
## AACACGTTGATACCGC-1 Ovarian_Carcinoma 39421 9691 3.000939
## AACACTCGTGAGCTTC-1 Ovarian_Carcinoma 44116 9971 2.550095
## AACAGGAATTCTGTGA-1 Ovarian_Carcinoma 31401 8725 2.184644
## AACAGGATGCTGGCAT-1 Ovarian_Carcinoma 32380 8688 1.639901
## RNA_snn_res.0.5 seurat_clusters labels
## AACAATGTGCTCCGAG-1 0 0 Epithelial cells
## AACACCATTCGCATAC-1 0 0 Epithelial cells
## AACACGTTGATACCGC-1 0 0 Mesangial cells
## AACACTCGTGAGCTTC-1 0 0 Epithelial cells
## AACAGGAATTCTGTGA-1 1 1 Mesangial cells
## AACAGGATGCTGGCAT-1 3 3 Epithelial cells
umap_pred <- DimPlot(
sobj, reduction = 'umap', group.by = 'labels', label = TRUE)
umap_pred
Compare with your previous data
umap_sobj <- DimPlot(
sobj, reduction = 'umap', group.by = 'seurat_clusters', label = TRUE)
umap_sobj | umap_pred
Plot the SingleR assignment score across all cell-label combination
plotScoreHeatmap(pred)
Load the human markers from CellMarker 2.0, file includes cell markers of different cell types from different tissue in human
human_marker <- read.csv("../bc_scRNAseq/data/CellMakerv2_Human.csv")
str(human_marker)
## 'data.frame': 60877 obs. of 20 variables:
## $ species : chr "Human" "Human" "Human" "Human" ...
## $ tissue_class : chr "Abdomen" "Abdomen" "Abdomen" "Abdomen" ...
## $ tissue_type : chr "Abdomen" "Abdomen" "Abdomen" "Abdomen" ...
## $ uberonongology_id: chr "UBERON_0000916" "UBERON_0000916" "UBERON_0000916" "UBERON_0000916" ...
## $ cancer_type : chr "Normal" "Normal" "Normal" "Normal" ...
## $ cell_type : chr "Normal cell" "Normal cell" "Normal cell" "Normal cell" ...
## $ cell_name : chr "Macrophage" "Macrophage" "Macrophage" "Macrophage" ...
## $ cellontology_id : chr "CL_0000235" "CL_0000235" "CL_0000235" "CL_0000235" ...
## $ marker : chr "MERTK" "CD16" "CD206" "CRIg" ...
## $ Symbol : chr "MERTK" "FCGR3A" "MRC1" "VSIG4" ...
## $ GeneID : int 10461 2215 4360 11326 9332 2167 NA 7350 930 2210 ...
## $ Genetype : chr "protein_coding" "protein_coding" "protein_coding" "protein_coding" ...
## $ Genename : chr "MER proto-oncogene, tyrosine kinase" "Fc fragment of IgG receptor IIIb" "mannose receptor C-type 1" "V-set and immunoglobulin domain containing 4" ...
## $ UNIPROTID : chr "Q12866" "O75015" "P22897" "Q9Y279" ...
## $ technology_seq : chr "None" "None" "None" "None" ...
## $ marker_source : chr "Experiment" "Experiment" "Experiment" "Experiment" ...
## $ PMID : int 31982413 31982413 31982413 31982413 31982413 32355218 32355218 32355218 NA NA ...
## $ Title : chr "Peritoneal Level of CD206 Associates With Mortality and an Inflammatory Macrophage Phenotype in Patients With D"| __truncated__ "Peritoneal Level of CD206 Associates With Mortality and an Inflammatory Macrophage Phenotype in Patients With D"| __truncated__ "Peritoneal Level of CD206 Associates With Mortality and an Inflammatory Macrophage Phenotype in Patients With D"| __truncated__ "Peritoneal Level of CD206 Associates With Mortality and an Inflammatory Macrophage Phenotype in Patients With D"| __truncated__ ...
## $ journal : chr "Gastroenterology" "Gastroenterology" "Gastroenterology" "Gastroenterology" ...
## $ year : int 2020 2020 2020 2020 2020 2020 2020 2020 NA NA ...
Select marker for ovary from the tissue class column
ovary_marker <- human_marker %>%
filter(species=="Human" & tissue_class=="Ovary") %>%
select(c(3,5,6,7,8,9,16))
ovary_marker %>% head()
## tissue_type cancer_type cell_type cell_name cellontology_id
## 1 Ovary Normal Normal cell Thecal cell <NA>
## 2 Ovary Normal Normal cell Epithelial cell CL_0000066
## 3 Corpus luteum Normal Normal cell Endothelial cell CL_0000115
## 4 Ovary Normal Normal cell Cancer cell CL_0001064
## 5 Ovary Normal Normal cell Germ cell CL_0000586
## 6 Ovary Normal Normal cell Germ cell CL_0000586
## marker marker_source
## 1 Aminopeptidase-N Experiment
## 2 FOLR1 Experiment
## 3 CD34 Experiment
## 4 FAS Experiment
## 5 MATER Experiment
## 6 BMP15 Experiment
Get markers table from your seurat object
sobj.markers <- FindAllMarkers(sobj, only.pos = TRUE)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
sobj.markers %>% head()
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## BHLHE41 6.203160e-239 0.9382200 0.999 0.817 1.119919e-234 0 BHLHE41
## SLC35E1 1.098709e-190 0.8376728 0.999 0.817 1.983610e-186 0 SLC35E1
## FNTA 1.824515e-187 0.9101818 0.991 0.838 3.293980e-183 0 FNTA
## PAX2 3.317360e-186 1.5397125 0.914 0.394 5.989162e-182 0 PAX2
## PERP 5.841617e-183 0.9194699 0.997 0.796 1.054646e-178 0 PERP
## TUBB2B 7.196608e-179 1.2094813 0.968 0.624 1.299276e-174 0 TUBB2B
Combine CellMarker with our markers sobj.markers
merged_cellmarker <- sobj.markers %>%
filter(p_val_adj<0.05 & avg_log2FC>1 & pct.1 > 0.8 & pct.2 < 0.5) %>%
left_join(ovary_marker[,c(3,4,6)], join_by(gene==marker), relationship = "many-to-many")
merged_cellmarker %>% head()
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene cell_type
## 1 3.317360e-186 1.539712 0.914 0.394 5.989162e-182 0 PAX2 <NA>
## 2 1.009558e-133 1.212609 0.841 0.379 1.822657e-129 0 TNNT1 <NA>
## 3 1.716705e-129 1.217553 0.869 0.423 3.099339e-125 0 MUC15 <NA>
## 4 6.979399e-129 1.098619 0.915 0.497 1.260061e-124 0 DPEP3 <NA>
## 5 3.019701e-125 1.327811 0.844 0.420 5.451768e-121 0 AK4 <NA>
## 6 3.639086e-118 1.319720 0.815 0.401 6.570005e-114 0 LRRTM1 <NA>
## cell_name
## 1 <NA>
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
Let’s try to see if there are fibroblast and astrocytes
merged_cellmarker %>%
filter(cluster==2 & cell_type=="Cancer cell")
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene cell_type
## 1 2.453948e-299 4.336628 0.976 0.399 4.430358e-295 2 PDGFRA Cancer cell
## 2 5.237721e-244 3.946544 0.936 0.483 9.456182e-240 2 CAV1 Cancer cell
## 3 6.911268e-239 3.360288 0.924 0.366 1.247760e-234 2 ACTA2 Cancer cell
## 4 6.911268e-239 3.360288 0.924 0.366 1.247760e-234 2 ACTA2 Cancer cell
## cell_name
## 1 Fibroblast
## 2 Fibroblast
## 3 Mesenchymal cell
## 4 Fibroblast
#select(c(7,8,9))
From the results, we found PDGFRA, CAV1, and ACTA2 pointing to Fibroblast in cancer cells
Check again using FeaturePlot()
genes <- c("PDGFRA", "CAV1", "ACTA2")
FeaturePlot(sobj, features = genes)
rename cluster 2 to Fibroblasts using RenameIdents()
sobj_new <- RenameIdents(sobj, '2' = 'Fibroblasts')
DimPlot(sobj_new, reduction = 'umap', label = TRUE)
Can we distinguish cluster 7?
merged_cellmarker %>%
filter(cluster==7 & cell_type=="Cancer cell") %>%
dplyr::select(c(7,8,9)) %>%
str()
## 'data.frame': 2 obs. of 3 variables:
## $ gene : chr "ACTA2" "ACTA2"
## $ cell_type: chr "Cancer cell" "Cancer cell"
## $ cell_name: chr "Mesenchymal cell" "Fibroblast"
BiocManager::install("clusterProfiler")
## Bioconductor version 3.19 (BiocManager 1.30.23), R 4.4.0 (2024-04-24)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'clusterProfiler'
## Old packages: 'DBI', 'dqrng', 'highr', 'knitr', 'quantreg', 'RcppArmadillo',
## 'SparseM'
BiocManager::install("org.Hs.eg.db")
## Bioconductor version 3.19 (BiocManager 1.30.23), R 4.4.0 (2024-04-24)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'org.Hs.eg.db'
## Old packages: 'DBI', 'dqrng', 'highr', 'knitr', 'quantreg', 'RcppArmadillo',
## 'SparseM'
BiocManager::install("DOSE")
## Bioconductor version 3.19 (BiocManager 1.30.23), R 4.4.0 (2024-04-24)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'DOSE'
## Old packages: 'DBI', 'dqrng', 'highr', 'knitr', 'quantreg', 'RcppArmadillo',
## 'SparseM'
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
library(clusterProfiler)
##
## clusterProfiler v4.12.0 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
library(DOSE)
## DOSE v3.30.1 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
marker_7_2 <- FindMarkers(sobj, ident.1 = 7, ident.2 = 2)
marker_7_2_sig <- marker_7_2 %>%
filter(abs(avg_log2FC)>=1 & p_val_adj<0.05 & pct.2<0.4)
str(marker_7_2_sig)
## 'data.frame': 728 obs. of 5 variables:
## $ p_val : num 2.30e-91 2.52e-64 4.13e-56 5.60e-47 1.10e-45 ...
## $ avg_log2FC: num 5.44 3.73 3.64 2.95 2.91 ...
## $ pct.1 : num 0.918 0.762 0.714 0.748 0.755 0.673 0.844 0.537 0.789 0.592 ...
## $ pct.2 : num 0.114 0.102 0.112 0.163 0.217 0.122 0.378 0.07 0.283 0.1 ...
## $ p_val_adj : num 4.15e-87 4.54e-60 7.46e-52 1.01e-42 1.99e-41 ...
log2 <- marker_7_2_sig$avg_log2FC
names(log2) <- rownames(marker_7_2_sig)
log2_sort <- sort(log2, decreasing = TRUE)
log2_sort[1:5]
## BLK COMP IGLC1 IGHA1 AGMAT
## 5.505561 5.440098 4.727742 4.710259 4.306363
Run gene set enrichment analysis:
gse <- gseGO(geneList = log2_sort,
ont="ALL",
keyType="SYMBOL",
nPerm=10000,
OrgDb = "org.Hs.eg.db")
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
Visualize the results
dotplot(gse)
Put the identity back
sobj_new <- RenameIdents(sobj,
'7' = 'Mesenchymal Cells',
'2' = 'Fibroblasts')
DimPlot(sobj_new, reduction = 'umap', label = TRUE)