MacBook Pro (Big Sur, 16-inch, 2019), Processor (2.4 GHz 8-Core Intel Core i9), Memory (64 GB 2667 MHz DDR4).
Attach necessary libraries:
library(ASURAT)
library(SingleCellExperiment)
library(SummarizedExperiment)
In this vignette, we create figures for the computational results of PBMC datasets.
Population ratios inferred by the existing methods and ASURAT are compared.
Load data.
<- readRDS("backup/04_006_pbmcs4k_annotation.rds")
pbmc_asurat <- readRDS("backup/04_011_pbmc4k_scran.rds")
pbmc_scran <- readRDS("backup/04_021_pbmc4k_seurat.rds")
pbmc_seurat <- readRDS("backup/04_022_pbmc4k_seurat_sccatch.rds")
pbmc_sccatch <- readRDS("backup/04_031_pbmc4k_monocle3.rds")
pbmc_monocle3 <- readRDS("backup/04_041_pbmc4k_sc3.rds")
pbmc_sc3
<- list(scran = pbmc_scran, Seurat = pbmc_seurat, scCATCH = pbmc_sccatch,
pbmcs Monocle3 = pbmc_monocle3, SC3 = pbmc_sc3, ASURAT = pbmc_asurat$CB)
The hypothetical results can be obtained from Cao et al., Front. Genet. (2020).
<- c("T", "Mono", "B", "NK/NKT", "Megakaryocyte", "DC", "Unspecified")
ct <- c(0.4226, 0.2707, 0.1509, 0.1546, 0.0012, 0, 0)
cr <- data.frame(cell_type = ct, ratio = cr, method = "SCSA") res
Prepare a data frame to be plotted.
<- c("B", "DC", "Megakaryocyte", "Mono", "NK/NKT", "T", "Unspecified")
cells for(i in seq_along(pbmcs)){
if(class(pbmcs[[i]]) == "SingleCellExperiment"){
<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "cell_data_set"){
}<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "Seurat"){
}<- pbmcs[[i]][[]]
df
}<- dplyr::group_by(df, cell_type)
df <- dplyr::summarise(df, ratio = dplyr::n())
df $ratio <- as.numeric(df$ratio) / sum(as.numeric(df$ratio))
df$method <- names(pbmcs)[i]
dffor(k in seq_along(cells)){
if(!(cells[k] %in% df$cell_type)){
<- data.frame(cell_type = cells[k], ratio = 0, method = names(pbmcs)[i])
tmp <- rbind(df, tmp)
df
}
}<- rbind(res, df)
res }
Change the names of cell types.
$cell_type[res$cell_type == "T"] <- "T cell"
res$cell_type[res$cell_type == "Mono"] <- "Monocyte"
res$cell_type[res$cell_type == "NK/NKT"] <- "NK or NKT cell"
res$cell_type[res$cell_type == "B"] <- "B cell"
res$cell_type[res$cell_type == "DC"] <- "Dendritic cell" res
Show the population ratios.
$method <- factor(res$method, levels = unique(res$method))
res<- length(unique(res$method))
n_methods <- c("black", scales::brewer_pal(palette = "Set1")(n_methods))
mycolors 7] <- mycolors[6] ; mycolors[6] <- "#FFFF33"
mycolors[
<- 0.55
ymax <- ggplot2::ggplot() +
p ::geom_bar(ggplot2::aes(x = res$cell_type, y = res$ratio,
ggplot2fill = as.factor(res$method)),
stat = "identity", width = 0.8,
position = ggplot2::position_dodge2(width = 1)) +
::theme_classic(base_size = 20, base_family = "Helvetica") +
ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(angle = 45)) +
ggplot2::ylim(0, ymax) +
ggplot2::labs(title = "PBMC 4k", x = "", y = "Population ratio", fill = "Method") +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 20),
ggplot2legend.position = "right") +
::scale_fill_manual(values = mycolors)
ggplot2<- "figures/figure_20_0010.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 11, height = 5) ggplot2
Load data.
<- readRDS("backup/05_011_pbmc6k_scran.rds")
pbmc_scran <- readRDS("backup/05_021_pbmc6k_seurat.rds")
pbmc_seurat <- readRDS("backup/05_022_pbmc6k_seurat_sccatch.rds")
pbmc_sccatch <- readRDS("backup/05_031_pbmc6k_monocle3.rds")
pbmc_monocle3 <- readRDS("backup/05_041_pbmc6k_sc3.rds")
pbmc_sc3 <- readRDS("backup/05_006_pbmcs6k_annotation.rds")
pbmc_asurat
<- list(scran = pbmc_scran, Seurat = pbmc_seurat, scCATCH = pbmc_sccatch,
pbmcs Monocle3 = pbmc_monocle3, SC3 = pbmc_sc3, ASURAT = pbmc_asurat$CB)
The hypothetical results can be obtained from Cao et al., Front. Genet. (2020).
<- c("T", "Mono", "B", "NK/NKT", "Megakaryocyte", "DC", "Unspecified")
ct <- c(0.4920, 0.2652, 0.1292, 0.1102, 0.0035, 0, 0)
cr <- data.frame(cell_type = ct, ratio = cr, method = "SCSA") res
Prepare a data frame to be plotted.
<- c("B", "DC", "Megakaryocyte", "Mono", "NK/NKT", "T", "Unspecified")
cells for(i in seq_along(pbmcs)){
if(class(pbmcs[[i]]) == "SingleCellExperiment"){
<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "cell_data_set"){
}<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "Seurat"){
}<- pbmcs[[i]][[]]
df
}<- dplyr::group_by(df, cell_type)
df <- dplyr::summarise(df, ratio = dplyr::n())
df $ratio <- as.numeric(df$ratio) / sum(as.numeric(df$ratio))
df$method <- names(pbmcs)[i]
dffor(k in seq_along(cells)){
if(!(cells[k] %in% df$cell_type)){
<- data.frame(cell_type = cells[k], ratio = 0, method = names(pbmcs)[i])
tmp <- rbind(df, tmp)
df
}
}<- rbind(res, df)
res }
Change the names of cell types.
$cell_type[res$cell_type == "T"] <- "T cell"
res$cell_type[res$cell_type == "Mono"] <- "Monocyte"
res$cell_type[res$cell_type == "NK/NKT"] <- "NK or NKT cell"
res$cell_type[res$cell_type == "B"] <- "B cell"
res$cell_type[res$cell_type == "DC"] <- "Dendritic cell" res
Show the population ratios.
$method <- factor(res$method, levels = unique(res$method))
res<- length(unique(res$method))
n_methods <- c("black", scales::brewer_pal(palette = "Set1")(n_methods))
mycolors 7] <- mycolors[6] ; mycolors[6] <- "#FFFF33"
mycolors[
<- 0.68
ymax <- ggplot2::ggplot() +
p ::geom_bar(ggplot2::aes(x = res$cell_type, y = res$ratio,
ggplot2fill = as.factor(res$method)),
stat = "identity", width = 0.8,
position = ggplot2::position_dodge2(width = 1)) +
::theme_classic(base_size = 20, base_family = "Helvetica") +
ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(angle = 45)) +
ggplot2::ylim(0, ymax) +
ggplot2::labs(title = "PBMC 6k", x = "", y = "Population ratio",
ggplot2fill = "Method") +
::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 20),
ggplot2legend.position = "right") +
::scale_fill_manual(values = mycolors)
ggplot2<- "figures/figure_20_0020.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 11, height = 5) ggplot2
Load data.
<- readRDS("backup/06_011_pbmc130k_scran.rds")
pbmc_scran <- readRDS("backup/06_021_pbmc130k_seurat.rds")
pbmc_seurat <- readRDS("backup/06_022_pbmc130k_seurat_sccatch.rds")
pbmc_sccatch <- readRDS("backup/06_031_pbmc130k_monocle3.rds")
pbmc_monocle3 #pbmc_sc3 <- readRDS("backup/06_041_pbmc130k_sc3.rds")
<- readRDS("backup/06_006_pbmcs130k_annotation.rds")
pbmc_asurat
<- list(scran = pbmc_scran, Seurat = pbmc_seurat, scCATCH = pbmc_sccatch,
pbmcs Monocle3 = pbmc_monocle3, ASURAT = pbmc_asurat$CB)
Load the sample information reported in previous paper (Reyes et al., 2020).
<- read.delim("rawdata/2020_001_Reyes/SCP548/metadata/scp_meta.txt")
info <- info[-1, ] ; rownames(info) <- info$NAME ; info <- info[, -1]
info which(info$Cell_Type == "NK"), ]$Cell_Type <- "NK/NKT"
info[<- dplyr::group_by(info, Cell_Type)
tmp <- dplyr::summarise(tmp, ratio = dplyr::n())
tmp $ratio <- as.numeric(tmp$ratio) / sum(as.numeric(tmp$ratio))
tmp<- data.frame(cell_type = tmp$Cell_Type, ratio = tmp$ratio, method = "Reyes_2020")
df <- rbind(df, c("Unspecified", 0, "Reyes_2020")) res
Prepare a data frame to be plotted.
<- c("B", "DC", "Megakaryocyte", "Mono", "NK/NKT", "T", "Unspecified")
cells for(i in seq_along(pbmcs)){
if(class(pbmcs[[i]]) == "SingleCellExperiment"){
<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "cell_data_set"){
}<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "Seurat"){
}<- pbmcs[[i]][[]]
df
}<- dplyr::group_by(df, cell_type)
df <- dplyr::summarise(df, ratio = dplyr::n())
df $ratio <- as.numeric(df$ratio) / sum(as.numeric(df$ratio))
df$method <- names(pbmcs)[i]
dffor(k in seq_along(cells)){
if(!(cells[k] %in% df$cell_type)){
<- data.frame(cell_type = cells[k], ratio = 0, method = names(pbmcs)[i])
tmp <- rbind(df, tmp)
df
}
}<- rbind(res, df)
res
}$ratio <- as.numeric(res$ratio) res
Change the names of cell types.
$cell_type[res$cell_type == "T"] <- "T cell"
res$cell_type[res$cell_type == "Mono"] <- "Monocyte"
res$cell_type[res$cell_type == "NK/NKT"] <- "NK or NKT cell"
res$cell_type[res$cell_type == "B"] <- "B cell"
res$cell_type[res$cell_type == "DC"] <- "Dendritic cell" res
Show the population ratios.
$method <- factor(res$method, levels = unique(res$method))
res<- length(unique(res$method))
n_methods <- c("black", scales::brewer_pal(palette = "Set1")(n_methods + 1))
mycolors 7] <- mycolors[6]
mycolors[
<- 0.65
ymax <- ggplot2::ggplot() +
p ::geom_bar(ggplot2::aes(x = res$cell_type, y = res$ratio,
ggplot2fill = as.factor(res$method)),
stat = "identity", width = 0.8,
position = ggplot2::position_dodge2(width = 1)) +
::theme_classic(base_size = 20, base_family = "Helvetica") +
ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(angle = 45)) +
ggplot2::ylim(0, ymax) +
ggplot2::labs(title = "Reyes 2020", x = "", y = "Population ratio",
ggplot2fill = "Method") +
::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 20),
ggplot2legend.position = "right") +
::scale_fill_manual(values = mycolors)
ggplot2<- "figures/figure_20_0030.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 11, height = 5) ggplot2
ASURAT includes multiple parameters for creating signs, such as
min_ngenes
and max_ngenes
in
remove_signs()
th_posi
and th_nega
in
cluster_genesets()
min_cnt_strg
and min_cnt_vari
in
create_signs()
threshold
and keep_rareID
in
remove_signs_redundant()
(only for ontology databases)keywords
in remove_signs_manually()
However, the most crucial parameters for sample (cell) clustering are
th_posi
and th_nega
, as well as
min_cnt_strg
and min_cnt_vari
in some
cases.
Here, an exhaustive parameter searching is demonstrated for PBMC 4k dataset. This is time-consuming but helpful for obtaining interpretable results.
load data.
<- readRDS("backup/04_003_pbmc4k_normalized.rds")
pbmc <- readRDS("backup/04_003_pbmc4k_cormat.rds") cormat
Load databases.
<- "https://github.com/keita-iida/ASURATDB/blob/main/genes2bioterm/"
urlpath load(url(paste0(urlpath, "20201213_human_CO.rda?raw=TRUE"))) # CO
load(url(paste0(urlpath, "20220308_human_MSigDB.rda?raw=TRUE"))) # MSigDB
load(url(paste0(urlpath, "20220308_human_CellMarker.rda?raw=TRUE"))) # CellMarker
Create a custom-built cell type-related databases by combining different databases for analyzing human single-cell transcriptome data.
<- list(human_CO[["cell"]], human_MSigDB[["cell"]], human_CellMarker[["cell"]])
d <- list(cell = do.call("rbind", d)) human_CB
Add formatted databases to metadata(sce)$sign
.
<- list(CB = pbmc)
pbmcs metadata(pbmcs$CB) <- list(sign = human_CB[["cell"]])
Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = pbmcs$CB, min_ngenes = 2, max_ngenes = 1000) pbmcs
Perform an exhausting parameter search.
<- "figures_pbmc4k_cell"
dirname dir.create(dirname)
<- seq(0.20, 0.40, length = 3)
th_posi <- -seq(0.20, 0.40, length = 3)
th_nega
<- 1
cnt for(i in seq_along(th_posi)){
for(j in seq_along(th_nega)){
# Create signs and sign-by-sample matrices.
set.seed(1)
<- cluster_genesets(sce = pbmcs$CB, cormat = cormat,
sce th_posi = th_posi[i], th_nega = th_nega[j])
<- create_signs(sce = sce, min_cnt_strg = 4, min_cnt_vari = 4)
sce # simmat <- human_GO$similarity_matrix$BP
# sce <- remove_signs_redundant(sce = sce, similarity_matrix = simmat,
# threshold = 0.85, keep_rareID = TRUE)
# keywords <- "Covid|COVID"
# sce <- remove_signs_manually(sce = sce, keywords = keywords)
<- makeSignMatrix(sce = sce, weight_strg = 0.5, weight_vari = 0.5)
sce # Cluster cells.
<- Seurat::as.Seurat(sce, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(30))
surt <- Seurat::FindClusters(surt, resolution = 0.11)
surt <- Seurat::as.SingleCellExperiment(surt)
temp <- colData(temp)$seurat_clusters
labels # Reduce dimension.
set.seed(1)
<- t(as.matrix(assay(sce, "counts")))
mat <- umap::umap(mat, n_components = 2)
res <- as.data.frame(res[["layout"]])
df = paste("alpha = ", format(th_posi[i], nsmall = 2), sep = "")
t1 = paste("beta = ", format(th_nega[j], nsmall = 2), sep = "")
t2 <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 0.5) +
::labs(title = "PBMC 4k", x = "UMAP_1", y = "UMAP_2") +
ggplot2# ggplot2::annotate("text", x = -Inf, y = Inf, label = t1, hjust = -0.16,
# vjust = 1.5, size = 4, colour = "red") +
# ggplot2::annotate("text", x = -Inf, y = Inf, label = t2, hjust = -0.17,
# vjust = 3.2, size = 4, colour = "red") +
::theme_classic(base_size = 18) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- paste0(dirname, "/", sprintf("figure_04_%04d.png", cnt))
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 4.8, height = 4)
ggplot2<- cnt + 1
cnt
} }
To investigate the dependency of ASURAT on the number of cells, benchmark ASURAT performance by randomly downsampling cells.
load data.
<- readRDS("backup/04_003_pbmc4k_normalized.rds") pbmc
Load databases.
<- "https://github.com/keita-iida/ASURATDB/blob/main/genes2bioterm/"
urlpath load(url(paste0(urlpath, "20201213_human_CO.rda?raw=TRUE"))) # CO
load(url(paste0(urlpath, "20220308_human_MSigDB.rda?raw=TRUE"))) # MSigDB
load(url(paste0(urlpath, "20220308_human_CellMarker.rda?raw=TRUE"))) # CellMarker
Create a custom-built cell type-related databases by combining different databases for analyzing human single-cell transcriptome data.
<- list(human_CO[["cell"]], human_MSigDB[["cell"]], human_CellMarker[["cell"]])
d <- list(cell = do.call("rbind", d)) human_CB
Perform random sampling of 4000 cells.
set.seed(1)
<- sample(dim(pbmc)[2], 4000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.15)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in low-dimensional spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 4000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0100.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.99)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/20_001_pbmc4000_asurat.rds")
# Load data.
<- readRDS("backup/20_001_pbmc4000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(40))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.05)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 4000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0105.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
# 1: Monocyte # S100A8 (p_val_adj ~0), S100A9 (p_val_adj ~0)
# 2: NK/NKT # NKG7 (p_val_adj ~0), GZMA (p_val_adj ~0)
# 3: B cell # CD79A (p_val_adj ~0), CD79B (p_val_adj ~0)
# 4: Dendritic cell # LILRA4 (p_val_adj ~e-271), LILRB4 (p_val_adj ~e-116)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/20_001_pbmc4000_seurat.rds")
# Load data.
<- readRDS("backup/20_001_pbmc4000_seurat.rds") surt
Perform random sampling of 3000 cells.
set.seed(1)
<- sample(dim(pbmc)[2], 3000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.10)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 3000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0110.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-259), TRAC (p_val_adj ~e-243)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.99)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/20_002_pbmc3000_asurat.rds")
# Load data.
<- readRDS("backup/20_002_pbmc3000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(40))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.05)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 3000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0115.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-270), EEF1A1 (p_val_adj ~e-239)
# 1: Monocyte # S100A8 (p_val_adj ~0), S100A9 (p_val_adj ~0)
# 2: NK/NKT # NKG7 (p_val_adj ~0), GZMA (p_val_adj ~0)
# 3: B cell # CD79A (p_val_adj ~0), CD79B (p_val_adj ~0)
# 4: Dendritic cell # LILRA4 (p_val_adj ~e-204), LILRB4 (p_val_adj ~e-102)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/20_002_pbmc3000_seurat.rds")
# Load data.
<- readRDS("backup/20_002_pbmc3000_seurat.rds") surt
Perform random sampling of 2000 cells.
set.seed(1)
<- sample(dim(pbmc)[2], 2000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(10))
surt <- Seurat::FindClusters(surt, resolution = 0.13)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 2000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0120.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-178), TRAC (p_val_adj ~e-156)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.97)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/20_003_pbmc2000_asurat.rds")
# Load data.
<- readRDS("backup/20_003_pbmc2000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(10))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.06)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 2000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0125.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-177), EEF1A1 (p_val_adj ~e-158)
# 1: Monocyte # FCN1 (p_val_adj ~0), MNDA (p_val_adj ~0)
# 2: NK/NKT # GZMA (p_val_adj ~e-238), NKG7 (p_val_adj ~e-220)
# 3: B cell # CD79A (p_val_adj ~e-302), MS4A1 (p_val_adj ~e-300)
# 4: Dendritic cell? # LILRA4 (p_val_adj ~e-129)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/20_003_pbmc2000_seurat.rds")
# Load data.
<- readRDS("backup/20_003_pbmc2000_seurat.rds") surt
Perform random sampling of 1500 cells.
set.seed(1)
<- sample(dim(pbmc)[2], 1500, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(50))
surt <- Seurat::FindClusters(surt, resolution = 0.40)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 1500 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0130.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-140), TRAC (p_val_adj ~e-121)
# 1: Monocyte # MSigDBID:20-S (...KUPFFER_CELLS_5) (sepI ~0.96)
# 2: NK/NKT cell # MSigDBID:1-V (...NK_NKT_CELLS_5) (sepI ~0.97)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.95)
# 4: Monocyte # MSigDBID:65-S (...MACROPHAGES) (sepI ~0.96)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "Mono"
# Save data.
saveRDS(sce, file = "backup/20_004_pbmc1500_asurat.rds")
# Load data.
<- readRDS("backup/20_004_pbmc1500_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(50))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.10)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 1500 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0135.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-138), EEF1A1 (p_val_adj ~e-121)
# 1: Monocyte # MNDA (p_val_adj ~e-228), FCN1 (p_val_adj ~e-217)
# 2: NK/NKT # GZMA (p_val_adj ~e-173), NKG7 (p_val_adj ~e-164)
# 3: B cell # CD79A (p_val_adj ~e-231), MS4A1 (p_val_adj ~e-226)
# 4: Dendritic cell? # FCER1A (p_val_adj ~e-176)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/20_004_pbmc1500_seurat.rds")
# Load data.
<- readRDS("backup/20_004_pbmc1500_seurat.rds") surt
Perform random sampling of 1000 cells.
set.seed(1)
<- sample(dim(pbmc)[2], 1000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(50))
surt <- Seurat::FindClusters(surt, resolution = 0.50)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 1000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0140.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: Unspecified # No significant genes are detected.
# 1: Monocyte # MSigDBID:15-S (...KUPFFER_CELLS_4) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.93)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.95)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "Unspecified"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
# Save data.
saveRDS(sce, file = "backup/20_005_pbmc1000_asurat.rds")
# Load data.
<- readRDS("backup/20_005_pbmc1000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(50))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.12)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 1000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0145.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: Unspecified # No significant genes are detected.
# 1: Monocyte # MNDA (p_val_adj ~e-167), FCN1 (p_val_adj ~e-161)
# 2: NK/NKT # GZMA (p_val_adj ~e-113), NKG7 (p_val_adj ~e-111)
# 3: B cell # CD79A (p_val_adj ~e-144), MS4A1 (p_val_adj ~e-132)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "Unspecified"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt# Save data.
saveRDS(surt, file = "backup/20_005_pbmc1000_seurat.rds")
# Load data.
<- readRDS("backup/20_005_pbmc1000_seurat.rds") surt
load data.
<- readRDS("backup/04_003_pbmc4k_normalized.rds") pbmc
Load databases.
<- "https://github.com/keita-iida/ASURATDB/blob/main/genes2bioterm/"
urlpath load(url(paste0(urlpath, "20201213_human_CO.rda?raw=TRUE"))) # CO
load(url(paste0(urlpath, "20220308_human_MSigDB.rda?raw=TRUE"))) # MSigDB
load(url(paste0(urlpath, "20220308_human_CellMarker.rda?raw=TRUE"))) # CellMarker
Create a custom-built cell type-related databases by combining different databases for analyzing human single-cell transcriptome data.
<- list(human_CO[["cell"]], human_MSigDB[["cell"]], human_CellMarker[["cell"]])
d <- list(cell = do.call("rbind", d)) human_CB
Perform random sampling of 4000 cells.
set.seed(2)
<- sample(dim(pbmc)[2], 4000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.15)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 4000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0100.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.99)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/21_001_pbmc4000_asurat.rds")
# Load data.
<- readRDS("backup/21_001_pbmc4000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(40))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.05)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 4000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0105.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
# 1: Monocyte # S100A8 (p_val_adj ~0), S100A9 (p_val_adj ~0)
# 2: NK/NKT # NKG7 (p_val_adj ~0), GZMA (p_val_adj ~0)
# 3: B cell # CD79A (p_val_adj ~0), CD79B (p_val_adj ~0)
# 4: Dendritic cell # LILRA4 (p_val_adj ~e-266), LILRB4 (p_val_adj ~e-113)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/21_001_pbmc4000_seurat.rds")
# Load data.
<- readRDS("backup/21_001_pbmc4000_seurat.rds") surt
Perform random sampling of 3000 cells.
set.seed(2)
<- sample(dim(pbmc)[2], 3000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.12)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 3000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0110.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-266), TRAC (p_val_adj ~e-225)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.98)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.97)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/21_002_pbmc3000_asurat.rds")
# Load data.
<- readRDS("backup/21_002_pbmc3000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(50))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.06)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 3000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0115.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: NK/NKT # GZMA (p_val_adj ~e-268), NKG7 (p_val_adj ~e-162)
# 1: T cell # EEF1A1 (p_val_adj ~e-164), TRAC (p_val_adj ~e-126)
# 2: Monocyte # S100A8 (p_val_adj ~0), S100A9 (p_val_adj ~0)
# 3: B cell # CD79A (p_val_adj ~0), CD79B (p_val_adj ~0)
# 4: Dendritic cell # LILRA4 (p_val_adj ~e-178)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "NK/NKT"
surt$cell_type[surt$cell_type == 1] <- "T"
surt$cell_type[surt$cell_type == 2] <- "Mono"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/21_002_pbmc3000_seurat.rds")
# Load data.
<- readRDS("backup/21_002_pbmc3000_seurat.rds") surt
Perform random sampling of 2000 cells.
set.seed(2)
<- sample(dim(pbmc)[2], 2000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(10))
surt <- Seurat::FindClusters(surt, resolution = 0.18)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 2000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0120.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-169), TRAC (p_val_adj ~e-149)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.97)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.97)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/21_003_pbmc2000_asurat.rds")
# Load data.
<- readRDS("backup/21_003_pbmc2000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(10))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.06)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 2000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0125.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-164), EEF1A1 (p_val_adj ~e-152)
# 1: Monocyte # FCN1 (p_val_adj ~0), MNDA (p_val_adj ~0)
# 2: NK/NKT # GZMA (p_val_adj ~e-241), NKG7 (p_val_adj ~e-225)
# 3: B cell # CD79A (p_val_adj ~e-304), MS4A1 (p_val_adj ~e-303)
# 4: Dendritic cell? # LILRA4 (p_val_adj ~e-146)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/21_003_pbmc2000_seurat.rds")
# Load data.
<- readRDS("backup/21_003_pbmc2000_seurat.rds") surt
Perform random sampling of 1500 cells.
set.seed(2)
<- sample(dim(pbmc)[2], 1500, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(20))
surt <- Seurat::FindClusters(surt, resolution = 0.30)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 1500 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0130.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-133), TRAC (p_val_adj ~e-103)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.94)
# 3: B cell # CellMarkerID:16-V (B cell...) (sepI ~0.95)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.97)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/21_004_pbmc1500_asurat.rds")
# Load data.
<- readRDS("backup/21_004_pbmc1500_asurat.rds") sce
#### Seurat Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(20))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.05)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 1500 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0135.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-121), EEF1A1 (p_val_adj ~e-112)
# 1: Monocyte # MNDA (p_val_adj ~e-255), FCN1 (p_val_adj ~e-232)
# 2: NK/NKT # GZMA (p_val_adj ~e-189), NKG7 (p_val_adj ~e-175)
# 3: B cell # CD79A (p_val_adj ~e-229), MS4A1 (p_val_adj ~e-222)
# 4: Dendritic cell? # LILRA4 (p_val_adj ~e-117)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/21_004_pbmc1500_seurat.rds")
# Load data.
<- readRDS("backup/21_004_pbmc1500_seurat.rds") surt
Perform random sampling of 1000 cells.
set.seed(2)
<- sample(dim(pbmc)[2], 1000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(30))
surt <- Seurat::FindClusters(surt, resolution = 0.30)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 1000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0140.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: Unspecified # No significant signs and genes are detected.
# 1: Monocyte # MSigDBID:28-S (...KUPFFER_CELLS_2) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.97)
# 3: B cell # CellMarkerID:16-V (...B_CELLS) (sepI ~0.94)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "Unspecified"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
# Save data.
saveRDS(sce, file = "backup/21_005_pbmc1000_asurat.rds")
# Load data.
<- readRDS("backup/21_005_pbmc1000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(20))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.15)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 1000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0145.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: Unspecified # No significant genes are detected.
# 1: Monocyte # MNDA (p_val_adj ~e-152), FCN1 (p_val_adj ~e-143)
# 2: NK/NKT # GZMA (p_val_adj ~e-127), NKG7 (p_val_adj ~e-121)
# 3: B cell # MS4A1 (p_val_adj ~e-155), CD79A (p_val_adj ~e-154)
# 4: Dendritic cell # FCER1A (p_val_adj ~e-102)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "Unspecified"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/21_005_pbmc1000_seurat.rds")
# Load data.
<- readRDS("backup/21_005_pbmc1000_seurat.rds") surt
load data.
<- readRDS("backup/04_003_pbmc4k_normalized.rds") pbmc
Load databases.
<- "https://github.com/keita-iida/ASURATDB/blob/main/genes2bioterm/"
urlpath load(url(paste0(urlpath, "20201213_human_CO.rda?raw=TRUE"))) # CO
load(url(paste0(urlpath, "20220308_human_MSigDB.rda?raw=TRUE"))) # MSigDB
load(url(paste0(urlpath, "20220308_human_CellMarker.rda?raw=TRUE"))) # CellMarker
Create a custom-built cell type-related databases by combining different databases for analyzing human single-cell transcriptome data.
<- list(human_CO[["cell"]], human_MSigDB[["cell"]], human_CellMarker[["cell"]])
d <- list(cell = do.call("rbind", d)) human_CB
Perform random sampling of 4000 cells.
set.seed(3)
<- sample(dim(pbmc)[2], 4000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.15)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 4000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_22_0100.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.99)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/22_001_pbmc4000_asurat.rds")
# Load data.
<- readRDS("backup/22_001_pbmc4000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(40))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.05)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 4000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_22_0105.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
# 1: Monocyte # S100A8 (p_val_adj ~0), S100A9 (p_val_adj ~0)
# 2: NK/NKT # NKG7 (p_val_adj ~0), GZMA (p_val_adj ~0)
# 3: B cell # CD79A (p_val_adj ~0), CD79B (p_val_adj ~0)
# 4: Dendritic cell # LILRA4 (p_val_adj ~e-273), LILRB4 (p_val_adj ~e-116)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/22_001_pbmc4000_seurat.rds")
# Load data.
<- readRDS("backup/22_001_pbmc4000_seurat.rds") surt
Perform random sampling of 3000 cells.
set.seed(3)
<- sample(dim(pbmc)[2], 3000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.12)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 3000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_22_0110.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-260), TRAC (p_val_adj ~e-236)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.99)
# 3: B cell # CellMarkerID:16-S (B cell...) (sepI ~0.96)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/22_002_pbmc3000_asurat.rds")
# Load data.
<- readRDS("backup/22_002_pbmc3000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(50))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.06)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 3000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_22_0115.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-260), EEF1A1 (p_val_adj ~e-242)
# 1: Monocyte # S100A8 (p_val_adj ~0), S100A9 (p_val_adj ~0)
# 2: NK/NKT # GZMA (p_val_adj ~0), NKG7 (p_val_adj ~0)
# 3: B cell # CD79A (p_val_adj ~0), CD79B (p_val_adj ~0)
# 4: Dendritic cell # LILRA4 (p_val_adj ~e-198)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/22_002_pbmc3000_seurat.rds")
# Load data.
<- readRDS("backup/22_002_pbmc3000_seurat.rds") surt
Perform random sampling of 2000 cells.
set.seed(3)
<- sample(dim(pbmc)[2], 2000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(50))
surt <- Seurat::FindClusters(surt, resolution = 0.25)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 2000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_22_0120.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-169), TRAC (p_val_adj ~e-149)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.98)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.95)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/22_003_pbmc2000_asurat.rds")
# Load data.
<- readRDS("backup/22_003_pbmc2000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(50))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.10)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 2000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_22_0125.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # LEF1 (p_val_adj ~e-118), EEF1A1 (p_val_adj ~e-112)
# 1: NK/NKT # GZMA (p_val_adj ~e-169), NKG7 (p_val_adj ~e-108)
# 2: Monocyte # MNDA (p_val_adj ~0), FCN1 (p_val_adj ~e-301)
# 3: B cell # CD79A (p_val_adj ~0), MS4A1 (p_val_adj ~0)
# 4: Dendritic cell? # LILRA4 (p_val_adj ~e-120)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "NK/NKT"
surt$cell_type[surt$cell_type == 2] <- "Mono"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/22_003_pbmc2000_seurat.rds")
# Load data.
<- readRDS("backup/22_003_pbmc2000_seurat.rds") surt
Perform random sampling of 1500 cells.
set.seed(3)
<- sample(dim(pbmc)[2], 1500, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(20))
surt <- Seurat::FindClusters(surt, resolution = 0.30)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 1500 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_22_0130.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-136), TRAC (p_val_adj ~e-119)
# 1: Monocyte # MSigDBID:15-S (...KUPFFER_CELLS_4) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.98)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
# Save data.
saveRDS(sce, file = "backup/22_004_pbmc1500_asurat.rds")
# Load data.
<- readRDS("backup/22_004_pbmc1500_asurat.rds") sce
#### Seurat Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(20))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.15)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 1500 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_22_0135.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-133), EEF1A1 (p_val_adj ~e-125)
# 1: Monocyte # MNDA (p_val_adj ~e-222), FCN1 (p_val_adj ~e-209)
# 2: NK/NKT # GZMA (p_val_adj ~e-178), NKG7 (p_val_adj ~e-168)
# 3: B cell # CD79A (p_val_adj ~e-231), MS4A1 (p_val_adj ~e-222)
# 4: Dendritic cell? # LILRA4 (p_val_adj ~e-150)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/22_004_pbmc1500_seurat.rds")
# Load data.
<- readRDS("backup/22_004_pbmc1500_seurat.rds") surt
Perform random sampling of 1000 cells.
set.seed(3)
<- sample(dim(pbmc)[2], 1000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(30))
surt <- Seurat::FindClusters(surt, resolution = 0.30)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 1000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_22_0140.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # CellMarkerID:182-V (Naive CD8+ T cell) (sepI ~0.91)
# 1: Monocyte # MSigDBID:15-S (...KUPFFER_CELLS_4) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.97)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
# Save data.
saveRDS(sce, file = "backup/22_005_pbmc1000_asurat.rds")
# Load data.
<- readRDS("backup/22_005_pbmc1000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(50))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.20)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 1000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_22_0145.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: Unspecified # No significant genes are detected.
# 1: Monocyte # MNDA (p_val_adj ~e-164), FCN1 (p_val_adj ~e-141)
# 2: NK/NKT # GZMA (p_val_adj ~e-127), NKG7 (p_val_adj ~e-121)
# 3: B cell # CD79A (p_val_adj ~e-152), MS4A1 (p_val_adj ~e-148)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "Unspecified"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt# Save data.
saveRDS(surt, file = "backup/22_005_pbmc1000_seurat.rds")
# Load data.
<- readRDS("backup/22_005_pbmc1000_seurat.rds") surt
Investigate population ratios, assuming that the existing cells are T cell, B cell, NK/NKT cell, monocyte, dendritic cell, and unspecified one.
load data.
<- c(4000, 3000, 2000, 1500, 1000)
ncells
<- list()
asurats for(i in seq_len(5)){
as.character(ncells[i])]] <- list()
asurats[[for(j in seq_len(3)){
<- sprintf("backup/2%d_%03d_pbmc%04d_asurat.rds", j - 1, i, ncells[i])
fname as.character(ncells[i])]][[paste0("try_", j)]] <- readRDS(fname)
asurats[[
}
}
<- list()
seurats for(i in seq_len(5)){
as.character(ncells[i])]] <- list()
seurats[[for(j in seq_len(3)){
<- sprintf("backup/2%d_%03d_pbmc%04d_seurat.rds", j - 1, i, ncells[i])
fname as.character(ncells[i])]][[paste0("try_", j)]] <- readRDS(fname)
seurats[[
} }
Below are the results of ASURAT.
<- c()
res <- c("B", "DC", "Mono", "NK/NKT", "T", "Unspecified")
cells for(i in seq_along(asurats)){
for(j in seq_along(asurats[[i]])){
<- as.data.frame(colData(asurats[[i]][[j]]$CB))
df <- paste0(nrow(df), " cells")
ncell <- dplyr::group_by(df, cell_type)
df <- dplyr::summarise(df, n = dplyr::n()) ; df$ratio <- df$n / sum(df$n)
df $ncells <- ncell
df$sample <- names(asurats[[i]])[j]
dffor(k in seq_along(cells)){
if(!(cells[k] %in% df$cell_type)){
<- data.frame(cell_type = cells[k], n = 0, ratio = 0,
tmp ncells = paste0(names(asurats)[i], " cells"),
sample = names(asurats[[i]])[j])
<- rbind(df, tmp)
df
}
}<- rbind(res, df)
res
}
}
for(i in seq_along(cells)){
<- paste0("PBMC: ", cells[i], " (ASURAT)")
title <- res[which(res$cell_type == cells[i]), ]
res_cell <- ggplot2::ggplot(res_cell) +
p ::geom_boxplot(ggplot2::aes(x = ncells, y = ratio),
ggplot2fill = "grey90", lwd = 1) +
::geom_jitter(ggplot2::aes(x = ncells, y = ratio), size = 2.5, stroke = 2,
ggplot2shape = 21, alpha = 1, color = "black", fill = "white",
position = ggplot2::position_jitter(w = 0.2, h = 0)) +
::scale_x_discrete(limit = unique(res_cell$ncells),
ggplot2guide = ggplot2::guide_axis(angle = 45)) +
::labs(title = title, x = "Down sampling", y = "Population ratio") +
ggplot2::theme_classic(base_size = 25) +# ggplot2::ylim(0.375, 0.425) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 25),
ggplot2legend.position = "none")
<- sprintf("figures/figure_23_%04d.png", 9 + i)
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 7, height = 6.5)
ggplot2 }
Below are the results of Seurat.
<- c()
res <- c("B", "DC", "Mono", "NK/NKT", "T", "Unspecified")
cells for(i in seq_along(seurats)){
for(j in seq_along(seurats[[i]])){
<- seurats[[i]][[j]][[]]
df <- paste0(nrow(df), " cells")
ncell <- dplyr::group_by(df, cell_type)
df <- dplyr::summarise(df, n = dplyr::n()) ; df$ratio <- df$n / sum(df$n)
df $ncells <- ncell
df$sample <- names(seurats[[i]])[j]
dffor(k in seq_along(cells)){
if(!(cells[k] %in% df$cell_type)){
<- data.frame(cell_type = cells[k], n = 0, ratio = 0,
tmp ncells = paste0(names(seurats)[i], " cells"),
sample = names(seurats[[i]])[j])
<- rbind(df, tmp)
df
}
}<- rbind(res, df)
res
}
}
for(i in seq_along(cells)){
<- paste0("PBMC: ", cells[i], " (Seurat)")
title <- res[which(res$cell_type == cells[i]), ]
res_cell <- ggplot2::ggplot(res_cell) +
p ::geom_boxplot(ggplot2::aes(x = ncells, y = ratio),
ggplot2fill = "grey90", lwd = 1) +
::geom_jitter(ggplot2::aes(x = ncells, y = ratio), size = 2.5, stroke = 2,
ggplot2shape = 21, alpha = 1, color = "black", fill = "white",
position = ggplot2::position_jitter(w = 0.2, h = 0)) +
::scale_x_discrete(limit = unique(res_cell$ncells),
ggplot2guide = ggplot2::guide_axis(angle = 45)) +
::labs(title = title, x = "Down sampling", y = "Population ratio") +
ggplot2::theme_classic(base_size = 25) +# ggplot2::ylim(0.375, 0.425) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 25),
ggplot2legend.position = "none")
<- sprintf("figures/figure_24_%04d.png", 9 + i)
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 7, height = 6.5)
ggplot2 }
Apply ASURAT on the combination of CO and CellMarker, GO and KEGG, and MSigDB at the same time and show the performance instead of apply it separately.
Load the normalized data (see here) and correlation matrix.
<- readRDS("backup/04_003_pbmc4k_normalized.rds")
pbmc <- readRDS("backup/04_003_pbmc4k_cormat.rds") cormat
Load the MSigDB.
<- "https://github.com/keita-iida/ASURATDB/blob/main/genes2bioterm/"
urlpath load(url(paste0(urlpath, "20220308_human_MSigDB.rda?raw=TRUE"))) # MSigDB
Perform ASURAT protocols.
# Create a sign-by-sample matrix.
<- list(CB = pbmc)
pbmcs metadata(pbmcs$CB) <- list(sign = human_MSigDB$cell)
$CB <- remove_signs(sce = pbmcs$CB, min_ngenes = 2, max_ngenes = 1000)
pbmcsset.seed(1)
$CB <- cluster_genesets(sce = pbmcs$CB, cormat = cormat,
pbmcsth_posi = 0.20, th_nega = -0.40)
$CB <- create_signs(sce = pbmcs$CB, min_cnt_strg = 4, min_cnt_vari = 4)
pbmcs$CB <- makeSignMatrix(sce = pbmcs$CB, weight_strg = 0.5, weight_vari = 0.5)
pbmcs# Perform dimension reduction (t-SNE).
set.seed(1)
<- t(as.matrix(assay(pbmcs$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(pbmcs$CB, "TSNE") <- res[["Y"]]
# Perform dimension reduction (UMAP).
set.seed(1)
<- t(as.matrix(assay(pbmcs$CB, "counts")))
mat <- umap::umap(mat, n_components = 2)
res reducedDim(pbmcs$CB, "UMAP") <- res[["layout"]]
# Perform a cell clustering.
<- Seurat::as.Seurat(pbmcs$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(pbmcs$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.15)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(pbmcs$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in low-dimensional spaces.
<- colData(pbmcs$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(pbmcs$CB, "UMAP"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 4k (ASURAT using MSigDB)",
ggplot2x = "UMAP_1", y = "UMAP_2", color = "") +
::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2# Compute separation indices for each cluster against the others.
set.seed(1)
<- colData(pbmcs$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = pbmcs$CB, labels = labels, nrand_samples = NULL)
pbmcs# Compute differentially expressed genes.
set.seed(1)
<- Seurat::as.Seurat(pbmcs$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(pbmcs$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(pbmcs$CB)$marker_genes$all <- res
Investigating significant signs, we inferred signaling pathway states as follows:
0: T # EEF1A1 (p_val_adj = 0), TRAC (p_val_adj = e-227)
1: Monocyte # MSigDBID:13-S (AIZARANI_LIVER_C23_KUPFFER_CELLS_3) (I = 0.998)
2: NK/NKT # MSigDBID:1-V (AIZARANI_LIVER_C1_NK_NKT_CELLS_1) (I = 0.937)
3: B cell # MSigDBID:23-V (AIZARANI_LIVER_C34_MHC_II_POS_B_CELLS) (I = 0.958)
4: Dendritic cell # LILRA4 (p_val_adj = e-248), LILRB (p_val_adj = e-106)
colData(pbmcs$CB)$path_state <- as.character(colData(pbmcs$CB)$seurat_clusters)
colData(pbmcs$CB)$path_state[colData(pbmcs$CB)$path_state == 0] <- "T cell"
colData(pbmcs$CB)$path_state[colData(pbmcs$CB)$path_state == 1] <- "Monocyte"
colData(pbmcs$CB)$path_state[colData(pbmcs$CB)$path_state == 2] <- "NK or NKT cell"
colData(pbmcs$CB)$path_state[colData(pbmcs$CB)$path_state == 3] <- "B cell"
colData(pbmcs$CB)$path_state[colData(pbmcs$CB)$path_state == 4] <- "Dendritic cell"
Show the annotation results in low-dimensional spaces.
<- "PBMC 4k (MSigDB)"
title <- factor(colData(pbmcs$CB)$path_state,
labels levels = c("T cell", "Monocyte", "NK or NKT cell", "B cell",
"Dendritic cell"))
<- as.data.frame(reducedDim(pbmcs$CB, "UMAP"))
df <- scales::brewer_pal(palette = "Set2")(5)
mycolor <- c("T cell" = mycolor[1], "Monocyte" = mycolor[2],
mycolor "NK or NKT cell" = mycolor[3], "B cell" = mycolor[4],
"Dendritic cell" = mycolor[5])
<- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "UMAP_1", y = "UMAP_2", color = "Cell state") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_color_manual(values = mycolor) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_30_0010.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 6.4, height = 4.3) ggplot2
# Save data.
saveRDS(pbmcs, file = "backup/30_001_pbmc4k_asurat_MSigDB.rds")
# Load data.
<- readRDS("backup/30_001_pbmc4k_asurat_MSigDB.rds") pbmcs
Load the normalized data (see here) and correlation matrix.
<- readRDS("backup/04_003_pbmc4k_normalized.rds")
pbmc <- readRDS("backup/04_003_pbmc4k_cormat.rds") cormat
Load the MSigDB.
<- "https://github.com/keita-iida/ASURATDB/blob/main/genes2bioterm/"
urlpath load(url(paste0(urlpath, "20201213_human_CO.rda?raw=TRUE"))) # CO
load(url(paste0(urlpath, "20220308_human_CellMarker.rda?raw=TRUE"))) # CellMarker
Perform ASURAT protocols.
# Create a sign-by-sample matrix.
<- list(human_CO[["cell"]], human_CellMarker[["cell"]])
d <- list(cell = do.call("rbind", d))
human_CB <- list(CB = pbmc)
pbmcs metadata(pbmcs$CB) <- list(sign = human_CB$cell)
$CB <- remove_signs(sce = pbmcs$CB, min_ngenes = 2, max_ngenes = 1000)
pbmcsset.seed(1)
$CB <- cluster_genesets(sce = pbmcs$CB, cormat = cormat,
pbmcsth_posi = 0.25, th_nega = -0.30)
$CB <- create_signs(sce = pbmcs$CB, min_cnt_strg = 3, min_cnt_vari = 3)
pbmcs$CB <- makeSignMatrix(sce = pbmcs$CB, weight_strg = 0.5, weight_vari = 0.5)
pbmcsset.seed(1)
<- t(as.matrix(assay(pbmcs$CB, "counts")))
mat <- umap::umap(mat, n_components = 2)
res reducedDim(pbmcs$CB, "UMAP") <- res[["layout"]]
# Perform a cell clustering.
<- Seurat::as.Seurat(pbmcs$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(pbmcs$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(15))
surt <- Seurat::FindClusters(surt, resolution = 0.20)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(pbmcs$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in low-dimensional spaces.
<- colData(pbmcs$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(pbmcs$CB, "UMAP"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 4k (ASURAT using CO & CellMarker)",
ggplot2x = "UMAP_1", y = "UMAP_2", color = "") +
::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2# Compute separation indices for each cluster against the others.
set.seed(1)
<- colData(pbmcs$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = pbmcs$CB, labels = labels, nrand_samples = NULL)
pbmcs# Compute differentially expressed genes.
set.seed(1)
<- Seurat::as.Seurat(pbmcs$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(pbmcs$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(pbmcs$CB)$marker_genes$all <- res
Investigating significant signs and differentially expressed genes, we inferred cell types as follows:
0: T # CellMarkerID:60-S (CD4+ T cell) (I = 0.759)
1: Monocyte # S100A8 (p_val_adj = 0), S100A9 (p_val_adj = 0)
2: NK/NKT # CellMarkerID:184-V (Natural killer cell) (I = 0.942)
3: B cell # CD79A (p_val_adj = 0), CD79B (p_val_adj = 0)
4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (I = 0.789)
# LILRA4 (p_val_adj = e-247)
colData(pbmcs$CB)$cell_type <- as.character(colData(pbmcs$CB)$seurat_clusters)
colData(pbmcs$CB)$cell_type[colData(pbmcs$CB)$cell_type == 0] <- "T cell"
colData(pbmcs$CB)$cell_type[colData(pbmcs$CB)$cell_type == 1] <- "Monocyte"
colData(pbmcs$CB)$cell_type[colData(pbmcs$CB)$cell_type == 2] <- "NK or NKT cell"
colData(pbmcs$CB)$cell_type[colData(pbmcs$CB)$cell_type == 3] <- "B cell"
colData(pbmcs$CB)$cell_type[colData(pbmcs$CB)$cell_type == 4] <- "Dendritic cell"
Show the annotation results in low-dimensional spaces.
<- "PBMC 4k (CO and CellMarker)"
title <- factor(colData(pbmcs$CB)$cell_type,
labels levels = c("T cell", "Monocyte", "NK or NKT cell", "B cell",
"Dendritic cell"))
<- as.data.frame(reducedDim(pbmcs$CB, "UMAP"))
df <- scales::hue_pal()(5)
mycolor <- c("T cell" = mycolor[1], "Monocyte" = mycolor[2],
mycolor "NK or NKT cell" = mycolor[3], "B cell" = mycolor[4],
"Dendritic cell" = mycolor[5])
<- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "UMAP_1", y = "UMAP_2", color = "Cell state") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_30_0020.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 6.4, height = 4.3) ggplot2
# Save data.
saveRDS(pbmcs, file = "backup/30_002_pbmc4k_asurat_COCellMarker.rds")
# Load data.
<- readRDS("backup/30_002_pbmc4k_asurat_COCellMarker.rds") pbmcs
Load the normalized data (see here) and correlation matrix.
<- readRDS("backup/04_003_pbmc4k_normalized.rds")
pbmc <- readRDS("backup/04_003_pbmc4k_cormat.rds") cormat
Load the MSigDB.
<- "https://github.com/keita-iida/ASURATDB/blob/main/genes2bioterm/"
urlpath load(url(paste0(urlpath, "20201213_human_GO_red.rda?raw=TRUE"))) # GO
load(url(paste0(urlpath, "20201213_human_KEGG.rda?raw=TRUE"))) # KEGG
Perform ASURAT protocols.
# Create a sign-by-sample matrix.
<- list(human_GO[["BP"]], human_KEGG[["pathway"]])
d <- list(cell = do.call("rbind", d))
human_CB <- list(CB = pbmc)
pbmcs metadata(pbmcs$CB) <- list(sign = human_CB$cell)
$CB <- remove_signs(sce = pbmcs$CB, min_ngenes = 2, max_ngenes = 1000)
pbmcsset.seed(1)
$CB <- cluster_genesets(sce = pbmcs$CB, cormat = cormat,
pbmcsth_posi = 0.20, th_nega = -0.25)
$CB <- create_signs(sce = pbmcs$CB, min_cnt_strg = 4, min_cnt_vari = 4)
pbmcs<- "Covid|COVID"
keywords $CB <- remove_signs_manually(sce = pbmcs$CB, keywords = keywords)
pbmcs$CB <- makeSignMatrix(sce = pbmcs$CB, weight_strg = 0.5, weight_vari = 0.5)
pbmcsset.seed(1)
<- t(as.matrix(assay(pbmcs$CB, "counts")))
mat <- umap::umap(mat, n_components = 2)
res reducedDim(pbmcs$CB, "UMAP") <- res[["layout"]]
# Perform a cell clustering.
<- Seurat::as.Seurat(pbmcs$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(pbmcs$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(50))
surt <- Seurat::FindClusters(surt, resolution = 0.15)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(pbmcs$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in low-dimensional spaces.
<- colData(pbmcs$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(pbmcs$CB, "UMAP"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 4k (ASURAT using GO & KEGG)",
ggplot2x = "UMAP_1", y = "UMAP_2", color = "") +
::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2# Compute separation indices for each cluster against the others.
set.seed(1)
<- colData(pbmcs$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = pbmcs$CB, labels = labels, nrand_samples = NULL)
pbmcs# Compute differentially expressed genes.
set.seed(1)
<- Seurat::as.Seurat(pbmcs$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(pbmcs$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(pbmcs$CB)$marker_genes$all <- res
Investigating significant signs, we inferred cellular functional states as follows:
0: T? # GO:0065003-S (protein-containing complex assembly) (I = 0.885)
# GO:0010629-S (negative regulation of gene expression) (I = 0.873)
1: Monocyte? # GO:0030595-S (leukocyte chemotaxis) (I = 0.998)
# GO:0001906-S (cell killing) (I = 0.998)
2: NK/NKT? # GO:0071347-V (cellular response to interleukin-1) (I = 0.962)
# GO:0045089-V (positive regulation of innate immune response)
# (I = 0.951)
3: B cell? # GO:0050853-V (B cell receptor signaling pathway) (I = 0.991)
# GO:0002455-S (humoral immune response mediated by circulating
# immunoglobulin) (I = 0.979)
4: Dendritic cell? # GO:0070972-V (protein localization to endoplasmic reticulum)
# (I = 0.977)
# GO:0002532-S (production of molecular mediator involved in
# inflammatory response)
# (I = 0.943)
colData(pbmcs$CB)$func_state <- as.character(colData(pbmcs$CB)$seurat_clusters)
colData(pbmcs$CB)$func_state[colData(pbmcs$CB)$func_state == 0] <- "Unspecified"
colData(pbmcs$CB)$func_state[colData(pbmcs$CB)$func_state == 1] <- "Cell killing"
colData(pbmcs$CB)$func_state[colData(pbmcs$CB)$func_state == 2] <- "Immune-1"
colData(pbmcs$CB)$func_state[colData(pbmcs$CB)$func_state == 3] <- "B cell"
colData(pbmcs$CB)$func_state[colData(pbmcs$CB)$func_state == 4] <- "Immune-2"
Show the annotation results in low-dimensional spaces.
<- "PBMC 4k (GO and KEGG)"
title <- factor(colData(pbmcs$CB)$func_state,
labels levels = c("Unspecified", "Cell killing", "Immune-1", "B cell",
"Immune-2"))
<- as.data.frame(reducedDim(pbmcs$CB, "UMAP"))
df <- scales::brewer_pal(palette = "Set1")(5)
mycolor <- c("Unspecified" = mycolor[1], "Cell killing" = mycolor[2],
mycolor "Immune-1" = mycolor[3], "B cell" = mycolor[4],
"Immune-2" = mycolor[5])
<- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "UMAP_1", y = "UMAP_2", color = "Cell state") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_color_manual(values = mycolor) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_30_0030.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 6.1, height = 4.3) ggplot2
# Save data.
saveRDS(pbmcs, file = "backup/30_003_pbmc4k_asurat_GOKEGG.rds")
# Load data.
<- readRDS("backup/30_003_pbmc4k_asurat_GOKEGG.rds") pbmcs
Using PBMC 4k data, cluster cells based on an SSM, which is created from CO, CellMarker, MSigDB, GO, and KEGG databases.
Load data.
<- readRDS("backup/04_004_pbmcs4k_ssm.rds") pbmcs
Combine the SSMs by row to create a SingleCellExperiment object.
reducedDims(pbmcs$CB) <- NULL ; reducedDims(pbmcs$GO) <- NULL
reducedDims(pbmcs$KG) <- NULL
metadata(pbmcs$CB) <- list() ; metadata(pbmcs$GO) <- list()
metadata(pbmcs$KG) <- list()
<- list(pbmcs$CB, pbmcs$GO, pbmcs$KG)
pbmc <- do.call("rbind", pbmc) pbmc
Perform Uniform Manifold Approximation and Projection.
set.seed(1)
<- t(as.matrix(assay(pbmc, "counts")))
mat <- umap::umap(mat, n_components = 2)
res reducedDim(pbmc, "UMAP") <- res[["layout"]]
Show the results of dimensional reduction in low-dimensional spaces.
<- as.data.frame(reducedDim(pbmc, "UMAP"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2]),
ggplot2color = "black", size = 1, alpha = 1) +
::labs(title = "PBMC 4k (combined)", x = "UMAP_1", y = "UMAP_2") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18))
ggplot2<- "figures/figure_30_0040.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 4.1, height = 4.3) ggplot2
Cluster cells using Seurat.
<- 0.15
resolutions <- seq_len(40)
dims <- Seurat::as.Seurat(pbmc, counts = "counts", data = "counts")
surt <- as.matrix(assay(pbmc, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = dims)
surt <- Seurat::FindClusters(surt, resolution = resolutions)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(pbmc)$seurat_clusters <- colData(temp)$seurat_clusters
Investigate significant signs.
set.seed(1)
<- colData(pbmc)$seurat_clusters
labels <- compute_sepI_all(sce = pbmc, labels = labels, nrand_samples = NULL) pbmc
Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(pbmc, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(pbmc), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(pbmc)$marker_genes$all <- res
Investigating significant signs and differentially expressed genes, we inferred cell types as follows:
0: T # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
1: Monocyte # MSigDBID:13-S (AIZARANI_LIVER_C23_KUPFFER_CELLS_3) (I = 0.999)
2: NK/NKT # MSigDBID:1-V (AIZARANI_LIVER_C1_NK_NKT_CELLS_1) (I = 0.978)
3: B cell # GO:0050853-V (B cell receptor signaling pathway) (I = 0.974)
4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (I = 0.982)
colData(pbmc)$cell_type <- as.character(colData(pbmc)$seurat_clusters)
colData(pbmc)$cell_type[colData(pbmc)$cell_type == 0] <- "T cell"
colData(pbmc)$cell_type[colData(pbmc)$cell_type == 1] <- "Monocyte"
colData(pbmc)$cell_type[colData(pbmc)$cell_type == 2] <- "NK or NKT cell"
colData(pbmc)$cell_type[colData(pbmc)$cell_type == 3] <- "B cell"
colData(pbmc)$cell_type[colData(pbmc)$cell_type == 4] <- "Dendritic cell"
Show the annotation results in low-dimensional spaces.
<- "PBMC 4k"
title <- factor(colData(pbmc)$cell_type,
labels levels = c("T cell", "Monocyte", "NK or NKT cell", "B cell",
"Dendritic cell"))
<- as.data.frame(reducedDim(pbmc, "UMAP"))
df <- scales::hue_pal()(5)
mycolor <- c("T cell" = mycolor[1], "Monocyte" = mycolor[2],
mycolor "NK or NKT cell" = mycolor[3], "B cell" = mycolor[4],
"Dendritic cell" = mycolor[5])
<- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "UMAP_1", y = "UMAP_2", color = "Cell state") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_color_manual(values = mycolor) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_30_0050.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 6.4, height = 4.3) ggplot2
Plot heatmaps for the selected signs.
# Significant signs
<- list()
marker_signs <- "foofoo|hogehoge"
keys = 1
i <- metadata(pbmc)$marker_signs$all
marker_signs[[i]] <- marker_signs[[i]][!grepl(keys, marker_signs[[i]]$Description), ]
marker_signs[[i]] <- dplyr::group_by(marker_signs[[i]], Ident_1)
marker_signs[[i]] #marker_signs[[i]] <- dplyr::slice_max(marker_signs[[i]], sepI, n = 3)
#marker_signs[[i]] <- dplyr::slice_min(marker_signs[[i]], Rank, n = 3)
# ssm_list
<- list() ; ssm_list <- list()
sces_sub = 1
i <- pbmc[rownames(pbmc) %in% marker_signs[[i]]$SignID, ]
sces_sub[[i]] <- assay(sces_sub[[i]], "counts")
ssm_list[[i]] names(ssm_list) <- c("sign_scores")
# ssmlabel_list
<- list() ; ssmlabel_list <- list()
labels = 1
i <- factor(colData(pbmcs[[i]])$cell_type,
tmp levels = c("T cell", "Monocyte", "NK or NKT cell", "B cell",
"Dendritic cell"))
<- data.frame(label = tmp)
labels[[i]] <- length(unique(tmp))
n_groups $color <- scales::hue_pal()(n_groups)[tmp]
labels[[i]]<- labels[[i]]
ssmlabel_list[[i]] names(ssmlabel_list) <- c("Label_cellstate")
# Plot heatmaps for the selected signs.
<- "figures/figure_30_0060.png"
filename #png(file = filename, height = 600, width = 540, res = 120)
png(file = filename, height = 260, width = 270, res = 60)
set.seed(1)
<- "PBMC 4k"
title plot_multiheatmaps(ssm_list = ssm_list, gem_list = NULL,
ssmlabel_list = ssmlabel_list, gemlabel_list = NULL,
nrand_samples = 500, show_row_names = FALSE, title = title)
dev.off()
# Save data.
saveRDS(pbmc, file = "backup/30_005_pbmc4k_combined.rds")
# Load data.
<- readRDS("backup/30_005_pbmc4k_combined.rds") pbmc
Using PBMC 4k data, cluster cells based on SSMs, which are separately created from CO and CellMarker, GO and KEGG, and MSigDB databases.
Load data.
<- readRDS("backup/30_001_pbmc4k_asurat_MSigDB.rds")
pbmcs_MS <- readRDS("backup/30_002_pbmc4k_asurat_COCellMarker.rds")
pbmcs_CM <- readRDS("backup/30_003_pbmc4k_asurat_GOKEGG.rds")
pbmcs_GK <- list(CM = pbmcs_CM[["CB"]], GK = pbmcs_GK[["CB"]], MS = pbmcs_MS[["CB"]]) pbmcs
Plot heatmaps for the selected signs.
# Significant signs
<- list()
marker_signs <- "foofoo|hogehoge"
keys for(i in seq_along(pbmcs)){
<- metadata(pbmcs[[i]])$marker_signs$all
marker_signs[[i]] <- marker_signs[[i]][!grepl(keys, marker_signs[[i]]$Description), ]
marker_signs[[i]] <- dplyr::group_by(marker_signs[[i]], Ident_1)
marker_signs[[i]] # marker_signs[[i]] <- dplyr::slice_max(marker_signs[[i]], sepI, n = 3)
# marker_signs[[i]] <- dplyr::slice_min(marker_signs[[i]], Rank, n = 3)
}# ssm_list
<- list() ; ssm_list <- list()
sces_sub for(i in seq_along(pbmcs)){
<- pbmcs[[i]][rownames(pbmcs[[i]]) %in% marker_signs[[i]]$SignID, ]
sces_sub[[i]] <- assay(sces_sub[[i]], "counts")
ssm_list[[i]]
}names(ssm_list) <- c("SSM_celltype", "SSM_function", "SSM_pathway")
# ssmlabel_list
<- list() ; ssmlabel_list <- list()
labels for(i in seq_along(pbmcs)){
if(i == 1){
<- factor(colData(pbmcs[[i]])$cell_type,
tmp levels = c("T cell", "Monocyte", "NK or NKT cell", "B cell",
"Dendritic cell"))
else if(i == 2){
}<- factor(colData(pbmcs[[i]])$func_state,
tmp levels = c("Unspecified", "Cell killing", "Immune-1", "B cell",
"Immune-2"))
else if(i == 3){
}<- factor(colData(pbmcs[[i]])$path_state,
tmp levels = c("T cell", "Monocyte", "NK or NKT cell", "B cell",
"Dendritic cell"))
}<- data.frame(label = tmp)
labels[[i]] <- length(unique(tmp))
n_groups if(i == 1){
$color <- scales::hue_pal()(n_groups)[tmp]
labels[[i]]else if(i == 2){
}$color <- scales::brewer_pal(palette = "Set1")(n_groups)[tmp]
labels[[i]]else if(i == 3){
}$color <- scales::brewer_pal(palette = "Set2")(n_groups)[tmp]
labels[[i]]
}<- labels[[i]]
ssmlabel_list[[i]]
}names(ssmlabel_list) <- c("Label_CO_CellMarker", "Label_GO_KEGG", "Label_MSigDB")
# Plot heatmaps for the selected signs.
<- "figures/figure_30_0070.png"
filename #png(file = filename, height = 600, width = 800, res = 120)
png(file = filename, height = 300, width = 400, res = 60)
set.seed(1)
<- "PBMC 4k"
title plot_multiheatmaps(ssm_list = ssm_list, gem_list = NULL,
ssmlabel_list = ssmlabel_list, gemlabel_list = NULL,
nrand_samples = 500, show_row_names = FALSE, title = title)
dev.off()
# Save data.
saveRDS(pbmcs, file = "backup/30_006_pbmcs4k_annotations.rds")
# Load data.
<- readRDS("backup/30_006_pbmcs4k_annotations.rds") pbmcs