You can view an example script for this workflow by running the following command
The example marker data from pbmc3k datasets:
library(easybio)
head(pbmc.markers)
#> p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
#> RPS12 1.273332e-143 0.7387061 1.000 0.991 1.746248e-139 0 RPS12
#> RPS6 6.817653e-143 0.6934523 1.000 0.995 9.349729e-139 0 RPS6
#> RPS27 4.661810e-141 0.7372604 0.999 0.992 6.393206e-137 0 RPS27
#> RPL32 8.158412e-138 0.6266075 0.999 0.995 1.118845e-133 0 RPL32
#> RPS14 5.177478e-130 0.6336957 1.000 0.994 7.100394e-126 0 RPS14
#> RPS25 3.244898e-123 0.7689940 0.997 0.975 4.450053e-119 0 RPS25
(marker <- matchCellMarker2(marker = pbmc.markers, n = 50, spc = 'Human')[, head(.SD, 2), by=cluster])
#> Key: <cluster>
#> cluster cell_name uniqueN N
#> <fctr> <char> <int> <int>
#> 1: 0 Naive CD8+ T cell 6 34
#> 2: 0 Naive T(Th0) cell 3 32
#> 3: 1 Monocyte 9 133
#> 4: 1 Macrophage 8 63
#> 5: 2 Regulatory T(Treg) cell 11 148
#> 6: 2 T cell 11 82
#> 7: 3 B cell 9 317
#> 8: 3 Naive B cell 6 33
#> 9: 4 T cell 15 104
#> 10: 4 Natural killer cell 17 99
#> 11: 5 Macrophage 4 34
#> 12: 5 Monocyte 3 10
#> 13: 6 Natural killer cell 14 196
#> 14: 6 Cytotoxic T cell 4 24
#> 15: 7 Plasmacytoid dendritic cell(pDC) 8 42
#> 16: 7 Dendritic cell 6 38
#> 17: 8 Megakaryocyte 9 52
#> 18: 8 Endothelial cell 6 41
#> ordered_symbol orderN
#> <list> <list>
#> 1: CCR7,LEF1,CD8B,MAL,NELL2,TSHZ2 14,12, 2, 2, 2, 2
#> 2: CCR7,LEF1,LRRN3 23, 8, 1
#> 3: CD14,S100A8,S100A9,S100A12,FCGR1A,MS4A6A,... 82,22,15, 5, 4, 2,...
#> 4: CD14,FCGR1A,CCL2,PLA2G7,RNASE1,S100A8,... 46, 6, 2, 2, 2, 2,...
#> 5: FOXP3,IL2RA,CTLA4,TNFRSF4,TNFRSF18,ICOS,... 55,45,22, 7, 6, 4,...
#> 6: CD2,CTLA4,FOXP3,IL2RA,CD40LG,CCR6,... 32,12, 8, 7, 6, 4,...
#> 7: CD79A,CD19,MS4A1,FCER2,TCL1A,IGLL5,... 102, 97, 97, 6, 5, 3,...
#> 8: TCL1A,MS4A1,CD19,FCER2,CD79A,PCDH9 13, 6, 5, 5, 3, 1
#> 9: CD8A,CD8B,GZMK,TIGIT,CCL5,GZMA,... 38,10, 7, 7, 6, 6,...
#> 10: NKG7,KLRB1,GZMA,CCL5,CD160,CD8A,... 50, 9, 6, 5, 5, 3,...
#> 11: C1QA,C1QB,MS4A7,MS4A4A 13,10, 7, 4
#> 12: MS4A7,C1QB,C1QA 7,2,1
#> 13: NCAM1,GNLY,KLRF1,GZMB,NCR1,XCL1,... 61,42,25,16,14,11,...
#> 14: PRF1,GZMB,GNLY,FGFBP2 9,8,6,1
#> 15: CLEC4C,LILRA4,SCT,LAMP5,LRRC26,SERPINF1,... 19,16, 2, 1, 1, 1,...
#> 16: FCER1A,CLEC10A,LILRA4,FLT3,CD1E,CLEC4C 16,11, 4, 3, 2, 2
#> 17: PPBP,PF4,ITGA2B,GP9,MYL9,TUBB1,... 15,12, 9, 4, 4, 3,...
#> 18: CLDN5,ESAM,GNG11,LCN2,SERPINE1,SPARC 36, 1, 1, 1, 1, 1
#> markerWith
#> <list>
#> 1: LEF1,CCR7,MAL,LEF1,TSHZ2,CCR7,...
#> 2: CCR7,CCR7,LRRN3,CCR7,CCR7,CCR7,...
#> 3: S100A9,S100A8,CD14,S100A8,S100A9,CD14,...
#> 4: CD14,CD14,CD14,CD14,CD14,CD14,...
#> 5: CTLA4,TNFRSF4,IL2RA,TNFRSF18,FOXP3,FOXP3,...
#> 6: IL2RA,IL2RA,CD40LG,CD40LG,CD2,CTLA4,...
#> 7: MS4A1,CD19,CD79A,CD79A,CD19,MS4A1,...
#> 8: MS4A1,TCL1A,PCDH9,CD79A,TCL1A,FCER2,...
#> 9: CD8A,CD8A,CD8A,GZMA,CD8B,CD8A,...
#> 10: NKG7,NKG7,KLRB1,CD8A,NKG7,NKG7,...
#> 11: MS4A7,C1QB,C1QA,MS4A7,C1QA,C1QB,...
#> 12: MS4A7,MS4A7,MS4A7,MS4A7,MS4A7,C1QB,...
#> 13: GNLY,NCAM1,NCAM1,KLRF1,GNLY,NCAM1,...
#> 14: PRF1,GZMB,FGFBP2,GNLY,GZMB,PRF1,...
#> 15: CLEC4C,LILRA4,SERPINF1,CLEC4C,LILRA4,LILRA4,...
#> 16: FCER1A,CLEC10A,FCER1A,LILRA4,CLEC10A,FCER1A,...
#> 17: PF4,PPBP,SPARC,PPBP,ITGA2B,TUBB1,...
#> 18: CLDN5,CLDN5,CLDN5,CLDN5,SPARC,CLDN5,...
Explanation:
The function matches the top 50 genes in each cluster with the CellMarker2 database.
It then calculates the cell count for each matched cell type and prints the result, helping you identify possible cell types for the cluster.
To annotate, you can simply use the top-matched cell type:
cl2cell <- marker[, head(.SD, 1), by = .(cluster)]
cl2cell <- setNames(cl2cell[["cell_name"]], cl2cell[["cluster"]])
cl2cell
#> 0 1
#> "Naive CD8+ T cell" "Monocyte"
#> 2 3
#> "Regulatory T(Treg) cell" "B cell"
#> 4 5
#> "T cell" "Macrophage"
#> 6 7
#> "Natural killer cell" "Plasmacytoid dendritic cell(pDC)"
#> 8
#> "Megakaryocyte"
Visualize marker dot plots for similar clusters:
cls <- list(
c(1, 5, 7),
c(8),
c(3),
c(0,2, 4, 6)
)
dotplotList <- plotSeuratDot(seuratObject, cls, marker = pbmc.markers, n = 50, spc = 'Human', topcellN = 2)
Explanation:
This function searches for potential cell markers from the result
of matchCellMarker2
.
It then uses Seurat::DotPlot
to generate
corresponding dot plots for similar clusters.
Construct a named vector for annotation:
cl2cell <- finsert(
expression(
c(1, 5) == "Monocyte",
c(7) == "DC",
c(8) == "megakaryocyte",
c(3) == "B.cell",
c(0, 2) == "Naive.CD8.T.cell",
c(4) == "Cytotoxic.T.Cell",
c(6) == "Natural.killer.cell",
), len = 9)
cl2cell
#> 0 1 2
#> "Naive.CD8.T.cell" "Monocyte" "Naive.CD8.T.cell"
#> 3 4 5
#> "B.cell" "Cytotoxic.T.Cell" "Monocyte"
#> 6 7 8
#> "Natural.killer.cell" "DC" "megakaryocyte"
You can also directly retrieve markers:
get_marker(spc = 'Human', cell = c('Monocyte', 'Neutrophil'), number = 5, min.count = 1)
#> $Monocyte
#> [1] "CD14" "FCGR3A" "LYZ" "S100A8" "FCN1"
#>
#> $Neutrophil
#> [1] "FCGR3B" "S100A9" "CSF3R" "S100A8" "FCGR3A"
or Check the distribution of the marker directly: