MacBook Pro (Big Sur, 16-inch, 2019), Processor (2.4 GHz 8-Core Intel Core i9), Memory (64 GB 2667 MHz DDR4).
Attach necessary libraries:
library(ASURAT)
library(SingleCellExperiment)
library(SummarizedExperiment)
In this vignette, we create figures for the computational results of PBMC datasets.
Population ratios inferred by the existing methods and ASURAT are compared.
Load data.
<- readRDS("backup/04_006_pbmcs4k_annotation.rds")
pbmc_asurat <- readRDS("backup/04_011_pbmc4k_scran.rds")
pbmc_scran <- readRDS("backup/04_021_pbmc4k_seurat.rds")
pbmc_seurat <- readRDS("backup/04_022_pbmc4k_seurat_sccatch.rds")
pbmc_sccatch <- readRDS("backup/04_031_pbmc4k_monocle3.rds")
pbmc_monocle3 <- readRDS("backup/04_041_pbmc4k_sc3.rds")
pbmc_sc3
<- list(scran = pbmc_scran, Seurat = pbmc_seurat, scCATCH = pbmc_sccatch,
pbmcs Monocle3 = pbmc_monocle3, SC3 = pbmc_sc3, ASURAT = pbmc_asurat$CB)
The hypothetical results can be obtained from Cao et al., Front. Genet. (2020).
<- c("T", "Mono", "B", "NK/NKT", "Megakaryocyte", "DC", "Unspecified")
ct <- c(0.4226, 0.2707, 0.1509, 0.1546, 0.0012, 0, 0)
cr <- data.frame(cell_type = ct, ratio = cr, method = "SCSA") res
Prepare a data frame to be plotted.
<- c("B", "DC", "Megakaryocyte", "Mono", "NK/NKT", "T", "Unspecified")
cells for(i in seq_along(pbmcs)){
if(class(pbmcs[[i]]) == "SingleCellExperiment"){
<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "cell_data_set"){
}<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "Seurat"){
}<- pbmcs[[i]][[]]
df
}<- dplyr::group_by(df, cell_type)
df <- dplyr::summarise(df, ratio = dplyr::n())
df $ratio <- as.numeric(df$ratio) / sum(as.numeric(df$ratio))
df$method <- names(pbmcs)[i]
dffor(k in seq_along(cells)){
if(!(cells[k] %in% df$cell_type)){
<- data.frame(cell_type = cells[k], ratio = 0, method = names(pbmcs)[i])
tmp <- rbind(df, tmp)
df
}
}<- rbind(res, df)
res }
Change the names of cell types.
$cell_type[res$cell_type == "T"] <- "T cell"
res$cell_type[res$cell_type == "Mono"] <- "Monocyte"
res$cell_type[res$cell_type == "NK/NKT"] <- "NK or NKT cell"
res$cell_type[res$cell_type == "B"] <- "B cell"
res$cell_type[res$cell_type == "DC"] <- "Dendritic cell" res
Show the population ratios.
$method <- factor(res$method, levels = unique(res$method))
res<- length(unique(res$method))
n_methods <- c("black", scales::brewer_pal(palette = "Set1")(n_methods))
mycolors 7] <- mycolors[6] ; mycolors[6] <- "#FFFF33"
mycolors[
<- 0.55
ymax <- ggplot2::ggplot() +
p ::geom_bar(ggplot2::aes(x = res$cell_type, y = res$ratio,
ggplot2fill = as.factor(res$method)),
stat = "identity", width = 0.8,
position = ggplot2::position_dodge2(width = 1)) +
::theme_classic(base_size = 20, base_family = "Helvetica") +
ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(angle = 45)) +
ggplot2::ylim(0, ymax) +
ggplot2::labs(title = "PBMC 4k", x = "", y = "Population ratio", fill = "Method") +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 20),
ggplot2legend.position = "right") +
::scale_fill_manual(values = mycolors)
ggplot2<- "figures/figure_20_0010.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 11, height = 5) ggplot2
Load data.
<- readRDS("backup/05_011_pbmc6k_scran.rds")
pbmc_scran <- readRDS("backup/05_021_pbmc6k_seurat.rds")
pbmc_seurat <- readRDS("backup/05_022_pbmc6k_seurat_sccatch.rds")
pbmc_sccatch <- readRDS("backup/05_031_pbmc6k_monocle3.rds")
pbmc_monocle3 <- readRDS("backup/05_041_pbmc6k_sc3.rds")
pbmc_sc3 <- readRDS("backup/05_006_pbmcs6k_annotation.rds")
pbmc_asurat
<- list(scran = pbmc_scran, Seurat = pbmc_seurat, scCATCH = pbmc_sccatch,
pbmcs Monocle3 = pbmc_monocle3, SC3 = pbmc_sc3, ASURAT = pbmc_asurat$CB)
The hypothetical results can be obtained from Cao et al., Front. Genet. (2020).
<- c("T", "Mono", "B", "NK/NKT", "Megakaryocyte", "DC", "Unspecified")
ct <- c(0.4920, 0.2652, 0.1292, 0.1102, 0.0035, 0, 0)
cr <- data.frame(cell_type = ct, ratio = cr, method = "SCSA") res
Prepare a data frame to be plotted.
<- c("B", "DC", "Megakaryocyte", "Mono", "NK/NKT", "T", "Unspecified")
cells for(i in seq_along(pbmcs)){
if(class(pbmcs[[i]]) == "SingleCellExperiment"){
<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "cell_data_set"){
}<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "Seurat"){
}<- pbmcs[[i]][[]]
df
}<- dplyr::group_by(df, cell_type)
df <- dplyr::summarise(df, ratio = dplyr::n())
df $ratio <- as.numeric(df$ratio) / sum(as.numeric(df$ratio))
df$method <- names(pbmcs)[i]
dffor(k in seq_along(cells)){
if(!(cells[k] %in% df$cell_type)){
<- data.frame(cell_type = cells[k], ratio = 0, method = names(pbmcs)[i])
tmp <- rbind(df, tmp)
df
}
}<- rbind(res, df)
res }
Change the names of cell types.
$cell_type[res$cell_type == "T"] <- "T cell"
res$cell_type[res$cell_type == "Mono"] <- "Monocyte"
res$cell_type[res$cell_type == "NK/NKT"] <- "NK or NKT cell"
res$cell_type[res$cell_type == "B"] <- "B cell"
res$cell_type[res$cell_type == "DC"] <- "Dendritic cell" res
Show the population ratios.
$method <- factor(res$method, levels = unique(res$method))
res<- length(unique(res$method))
n_methods <- c("black", scales::brewer_pal(palette = "Set1")(n_methods))
mycolors 7] <- mycolors[6] ; mycolors[6] <- "#FFFF33"
mycolors[
<- 0.68
ymax <- ggplot2::ggplot() +
p ::geom_bar(ggplot2::aes(x = res$cell_type, y = res$ratio,
ggplot2fill = as.factor(res$method)),
stat = "identity", width = 0.8,
position = ggplot2::position_dodge2(width = 1)) +
::theme_classic(base_size = 20, base_family = "Helvetica") +
ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(angle = 45)) +
ggplot2::ylim(0, ymax) +
ggplot2::labs(title = "PBMC 6k", x = "", y = "Population ratio",
ggplot2fill = "Method") +
::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 20),
ggplot2legend.position = "right") +
::scale_fill_manual(values = mycolors)
ggplot2<- "figures/figure_20_0020.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 11, height = 5) ggplot2
Load data.
<- readRDS("backup/06_011_pbmc130k_scran.rds")
pbmc_scran <- readRDS("backup/06_021_pbmc130k_seurat.rds")
pbmc_seurat <- readRDS("backup/06_022_pbmc130k_seurat_sccatch.rds")
pbmc_sccatch <- readRDS("backup/06_031_pbmc130k_monocle3.rds")
pbmc_monocle3 #pbmc_sc3 <- readRDS("backup/06_041_pbmc130k_sc3.rds")
<- readRDS("backup/06_006_pbmcs130k_annotation.rds")
pbmc_asurat
<- list(scran = pbmc_scran, Seurat = pbmc_seurat, scCATCH = pbmc_sccatch,
pbmcs Monocle3 = pbmc_monocle3, ASURAT = pbmc_asurat$CB)
Load the sample information reported in previous paper (Reyes et al., 2020).
<- read.delim("rawdata/2020_001_Reyes/SCP548/metadata/scp_meta.txt")
info <- info[-1, ] ; rownames(info) <- info$NAME ; info <- info[, -1]
info which(info$Cell_Type == "NK"), ]$Cell_Type <- "NK/NKT"
info[<- dplyr::group_by(info, Cell_Type)
tmp <- dplyr::summarise(tmp, ratio = dplyr::n())
tmp $ratio <- as.numeric(tmp$ratio) / sum(as.numeric(tmp$ratio))
tmp<- data.frame(cell_type = tmp$Cell_Type, ratio = tmp$ratio, method = "Reyes_2020")
df <- rbind(df, c("Unspecified", 0, "Reyes_2020")) res
Prepare a data frame to be plotted.
<- c("B", "DC", "Megakaryocyte", "Mono", "NK/NKT", "T", "Unspecified")
cells for(i in seq_along(pbmcs)){
if(class(pbmcs[[i]]) == "SingleCellExperiment"){
<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "cell_data_set"){
}<- as.data.frame(colData(pbmcs[[i]]))
df else if(class(pbmcs[[i]]) == "Seurat"){
}<- pbmcs[[i]][[]]
df
}<- dplyr::group_by(df, cell_type)
df <- dplyr::summarise(df, ratio = dplyr::n())
df $ratio <- as.numeric(df$ratio) / sum(as.numeric(df$ratio))
df$method <- names(pbmcs)[i]
dffor(k in seq_along(cells)){
if(!(cells[k] %in% df$cell_type)){
<- data.frame(cell_type = cells[k], ratio = 0, method = names(pbmcs)[i])
tmp <- rbind(df, tmp)
df
}
}<- rbind(res, df)
res
}$ratio <- as.numeric(res$ratio) res
Change the names of cell types.
$cell_type[res$cell_type == "T"] <- "T cell"
res$cell_type[res$cell_type == "Mono"] <- "Monocyte"
res$cell_type[res$cell_type == "NK/NKT"] <- "NK or NKT cell"
res$cell_type[res$cell_type == "B"] <- "B cell"
res$cell_type[res$cell_type == "DC"] <- "Dendritic cell" res
Show the population ratios.
$method <- factor(res$method, levels = unique(res$method))
res<- length(unique(res$method))
n_methods <- c("black", scales::brewer_pal(palette = "Set1")(n_methods + 1))
mycolors 7] <- mycolors[6]
mycolors[
<- 0.65
ymax <- ggplot2::ggplot() +
p ::geom_bar(ggplot2::aes(x = res$cell_type, y = res$ratio,
ggplot2fill = as.factor(res$method)),
stat = "identity", width = 0.8,
position = ggplot2::position_dodge2(width = 1)) +
::theme_classic(base_size = 20, base_family = "Helvetica") +
ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(angle = 45)) +
ggplot2::ylim(0, ymax) +
ggplot2::labs(title = "Reyes 2020", x = "", y = "Population ratio",
ggplot2fill = "Method") +
::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 20),
ggplot2legend.position = "right") +
::scale_fill_manual(values = mycolors)
ggplot2<- "figures/figure_20_0030.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 11, height = 5) ggplot2
ASURAT includes multiple parameters for creating signs, such as
min_ngenes
and max_ngenes
in
remove_signs()
th_posi
and th_nega
in
cluster_genesets()
min_cnt_strg
and min_cnt_vari
in
create_signs()
threshold
and keep_rareID
in
remove_signs_redundant()
(only for ontology databases)keywords
in remove_signs_manually()
However, the most crucial parameters for sample (cell) clustering are
th_posi
and th_nega
, as well as
min_cnt_strg
and min_cnt_vari
in some
cases.
Here, an exhaustive parameter searching is demonstrated for PBMC 4k dataset. This is time-consuming but helpful for obtaining interpretable results.
load data.
<- readRDS("backup/04_003_pbmc4k_normalized.rds")
pbmc <- readRDS("backup/04_003_pbmc4k_cormat.rds") cormat
Load databases.
<- "https://github.com/keita-iida/ASURATDB/blob/main/genes2bioterm/"
urlpath load(url(paste0(urlpath, "20201213_human_CO.rda?raw=TRUE"))) # CO
load(url(paste0(urlpath, "20220308_human_MSigDB.rda?raw=TRUE"))) # MSigDB
load(url(paste0(urlpath, "20220308_human_CellMarker.rda?raw=TRUE"))) # CellMarker
Create a custom-built cell type-related databases by combining different databases for analyzing human single-cell transcriptome data.
<- list(human_CO[["cell"]], human_MSigDB[["cell"]], human_CellMarker[["cell"]])
d <- list(cell = do.call("rbind", d)) human_CB
Add formatted databases to metadata(sce)$sign
.
<- list(CB = pbmc)
pbmcs metadata(pbmcs$CB) <- list(sign = human_CB[["cell"]])
Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = pbmcs$CB, min_ngenes = 2, max_ngenes = 1000) pbmcs
Perform an exhausting parameter search.
<- "figures_pbmc4k_cell"
dirname dir.create(dirname)
<- seq(0.20, 0.40, length = 3)
th_posi <- -seq(0.20, 0.40, length = 3)
th_nega
<- 1
cnt for(i in seq_along(th_posi)){
for(j in seq_along(th_nega)){
# Create signs and sign-by-sample matrices.
set.seed(1)
<- cluster_genesets(sce = pbmcs$CB, cormat = cormat,
sce th_posi = th_posi[i], th_nega = th_nega[j])
<- create_signs(sce = sce, min_cnt_strg = 4, min_cnt_vari = 4)
sce # simmat <- human_GO$similarity_matrix$BP
# sce <- remove_signs_redundant(sce = sce, similarity_matrix = simmat,
# threshold = 0.85, keep_rareID = TRUE)
# keywords <- "Covid|COVID"
# sce <- remove_signs_manually(sce = sce, keywords = keywords)
<- makeSignMatrix(sce = sce, weight_strg = 0.5, weight_vari = 0.5)
sce # Cluster cells.
<- Seurat::as.Seurat(sce, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(30))
surt <- Seurat::FindClusters(surt, resolution = 0.11)
surt <- Seurat::as.SingleCellExperiment(surt)
temp <- colData(temp)$seurat_clusters
labels # Reduce dimension.
set.seed(1)
<- t(as.matrix(assay(sce, "counts")))
mat <- umap::umap(mat, n_components = 2)
res <- as.data.frame(res[["layout"]])
df = paste("alpha = ", format(th_posi[i], nsmall = 2), sep = "")
t1 = paste("beta = ", format(th_nega[j], nsmall = 2), sep = "")
t2 <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 0.5) +
::labs(title = "PBMC 4k", x = "UMAP_1", y = "UMAP_2") +
ggplot2# ggplot2::annotate("text", x = -Inf, y = Inf, label = t1, hjust = -0.16,
# vjust = 1.5, size = 4, colour = "red") +
# ggplot2::annotate("text", x = -Inf, y = Inf, label = t2, hjust = -0.17,
# vjust = 3.2, size = 4, colour = "red") +
::theme_classic(base_size = 18) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- paste0(dirname, "/", sprintf("figure_04_%04d.png", cnt))
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 4.8, height = 4)
ggplot2<- cnt + 1
cnt
} }
To investigate the dependency of ASURAT on the number of cells, benchmark ASURAT performance by randomly downsampling cells.
load data.
<- readRDS("backup/04_003_pbmc4k_normalized.rds") pbmc
Load databases.
<- "https://github.com/keita-iida/ASURATDB/blob/main/genes2bioterm/"
urlpath load(url(paste0(urlpath, "20201213_human_CO.rda?raw=TRUE"))) # CO
load(url(paste0(urlpath, "20220308_human_MSigDB.rda?raw=TRUE"))) # MSigDB
load(url(paste0(urlpath, "20220308_human_CellMarker.rda?raw=TRUE"))) # CellMarker
Create a custom-built cell type-related databases by combining different databases for analyzing human single-cell transcriptome data.
<- list(human_CO[["cell"]], human_MSigDB[["cell"]], human_CellMarker[["cell"]])
d <- list(cell = do.call("rbind", d)) human_CB
Perform random sampling of 4000 cells.
set.seed(1)
<- sample(dim(pbmc)[2], 4000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.15)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in low-dimensional spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 4000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0100.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.99)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/20_001_pbmc4000_asurat.rds")
# Load data.
<- readRDS("backup/20_001_pbmc4000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(40))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.05)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 4000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0105.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
# 1: Monocyte # S100A8 (p_val_adj ~0), S100A9 (p_val_adj ~0)
# 2: NK/NKT # NKG7 (p_val_adj ~0), GZMA (p_val_adj ~0)
# 3: B cell # CD79A (p_val_adj ~0), CD79B (p_val_adj ~0)
# 4: Dendritic cell # LILRA4 (p_val_adj ~e-271), LILRB4 (p_val_adj ~e-116)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/20_001_pbmc4000_seurat.rds")
# Load data.
<- readRDS("backup/20_001_pbmc4000_seurat.rds") surt
Perform random sampling of 3000 cells.
set.seed(1)
<- sample(dim(pbmc)[2], 3000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.10)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 3000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0110.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-259), TRAC (p_val_adj ~e-243)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.99)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/20_002_pbmc3000_asurat.rds")
# Load data.
<- readRDS("backup/20_002_pbmc3000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(40))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.05)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 3000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0115.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-270), EEF1A1 (p_val_adj ~e-239)
# 1: Monocyte # S100A8 (p_val_adj ~0), S100A9 (p_val_adj ~0)
# 2: NK/NKT # NKG7 (p_val_adj ~0), GZMA (p_val_adj ~0)
# 3: B cell # CD79A (p_val_adj ~0), CD79B (p_val_adj ~0)
# 4: Dendritic cell # LILRA4 (p_val_adj ~e-204), LILRB4 (p_val_adj ~e-102)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/20_002_pbmc3000_seurat.rds")
# Load data.
<- readRDS("backup/20_002_pbmc3000_seurat.rds") surt
Perform random sampling of 2000 cells.
set.seed(1)
<- sample(dim(pbmc)[2], 2000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(10))
surt <- Seurat::FindClusters(surt, resolution = 0.13)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 2000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0120.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-178), TRAC (p_val_adj ~e-156)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.97)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/20_003_pbmc2000_asurat.rds")
# Load data.
<- readRDS("backup/20_003_pbmc2000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(10))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.06)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 2000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0125.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-177), EEF1A1 (p_val_adj ~e-158)
# 1: Monocyte # FCN1 (p_val_adj ~0), MNDA (p_val_adj ~0)
# 2: NK/NKT # GZMA (p_val_adj ~e-238), NKG7 (p_val_adj ~e-220)
# 3: B cell # CD79A (p_val_adj ~e-302), MS4A1 (p_val_adj ~e-300)
# 4: Dendritic cell? # LILRA4 (p_val_adj ~e-129)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/20_003_pbmc2000_seurat.rds")
# Load data.
<- readRDS("backup/20_003_pbmc2000_seurat.rds") surt
Perform random sampling of 1500 cells.
set.seed(1)
<- sample(dim(pbmc)[2], 1500, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(50))
surt <- Seurat::FindClusters(surt, resolution = 0.40)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 1500 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0130.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-140), TRAC (p_val_adj ~e-121)
# 1: Monocyte # MSigDBID:20-S (...KUPFFER_CELLS_5) (sepI ~0.96)
# 2: NK/NKT cell # MSigDBID:1-V (...NK_NKT_CELLS_5) (sepI ~0.97)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.95)
# 4: Monocyte # MSigDBID:65-S (...MACROPHAGES) (sepI ~0.96)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "Mono"
# Save data.
saveRDS(sce, file = "backup/20_004_pbmc1500_asurat.rds")
# Load data.
<- readRDS("backup/20_004_pbmc1500_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(50))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.10)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 1500 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0135.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-138), EEF1A1 (p_val_adj ~e-121)
# 1: Monocyte # MNDA (p_val_adj ~e-228), FCN1 (p_val_adj ~e-217)
# 2: NK/NKT # GZMA (p_val_adj ~e-173), NKG7 (p_val_adj ~e-164)
# 3: B cell # CD79A (p_val_adj ~e-231), MS4A1 (p_val_adj ~e-226)
# 4: Dendritic cell? # FCER1A (p_val_adj ~e-176)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/20_004_pbmc1500_seurat.rds")
# Load data.
<- readRDS("backup/20_004_pbmc1500_seurat.rds") surt
Perform random sampling of 1000 cells.
set.seed(1)
<- sample(dim(pbmc)[2], 1000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(50))
surt <- Seurat::FindClusters(surt, resolution = 0.50)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 1000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0140.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: Unspecified # No significant genes are detected.
# 1: Monocyte # MSigDBID:15-S (...KUPFFER_CELLS_4) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.93)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.95)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "Unspecified"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
# Save data.
saveRDS(sce, file = "backup/20_005_pbmc1000_asurat.rds")
# Load data.
<- readRDS("backup/20_005_pbmc1000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(50))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.12)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 1000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_20_0145.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: Unspecified # No significant genes are detected.
# 1: Monocyte # MNDA (p_val_adj ~e-167), FCN1 (p_val_adj ~e-161)
# 2: NK/NKT # GZMA (p_val_adj ~e-113), NKG7 (p_val_adj ~e-111)
# 3: B cell # CD79A (p_val_adj ~e-144), MS4A1 (p_val_adj ~e-132)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "Unspecified"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt# Save data.
saveRDS(surt, file = "backup/20_005_pbmc1000_seurat.rds")
# Load data.
<- readRDS("backup/20_005_pbmc1000_seurat.rds") surt
load data.
<- readRDS("backup/04_003_pbmc4k_normalized.rds") pbmc
Load databases.
<- "https://github.com/keita-iida/ASURATDB/blob/main/genes2bioterm/"
urlpath load(url(paste0(urlpath, "20201213_human_CO.rda?raw=TRUE"))) # CO
load(url(paste0(urlpath, "20220308_human_MSigDB.rda?raw=TRUE"))) # MSigDB
load(url(paste0(urlpath, "20220308_human_CellMarker.rda?raw=TRUE"))) # CellMarker
Create a custom-built cell type-related databases by combining different databases for analyzing human single-cell transcriptome data.
<- list(human_CO[["cell"]], human_MSigDB[["cell"]], human_CellMarker[["cell"]])
d <- list(cell = do.call("rbind", d)) human_CB
Perform random sampling of 4000 cells.
set.seed(2)
<- sample(dim(pbmc)[2], 4000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.15)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 4000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0100.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.99)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.98)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/21_001_pbmc4000_asurat.rds")
# Load data.
<- readRDS("backup/21_001_pbmc4000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(40))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.05)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 4000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0105.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~0), EEF1A1 (p_val_adj ~0)
# 1: Monocyte # S100A8 (p_val_adj ~0), S100A9 (p_val_adj ~0)
# 2: NK/NKT # NKG7 (p_val_adj ~0), GZMA (p_val_adj ~0)
# 3: B cell # CD79A (p_val_adj ~0), CD79B (p_val_adj ~0)
# 4: Dendritic cell # LILRA4 (p_val_adj ~e-266), LILRB4 (p_val_adj ~e-113)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/21_001_pbmc4000_seurat.rds")
# Load data.
<- readRDS("backup/21_001_pbmc4000_seurat.rds") surt
Perform random sampling of 3000 cells.
set.seed(2)
<- sample(dim(pbmc)[2], 3000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(40))
surt <- Seurat::FindClusters(surt, resolution = 0.12)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 3000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0110.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-266), TRAC (p_val_adj ~e-225)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.98)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.97)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/21_002_pbmc3000_asurat.rds")
# Load data.
<- readRDS("backup/21_002_pbmc3000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(50))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.06)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 3000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0115.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: NK/NKT # GZMA (p_val_adj ~e-268), NKG7 (p_val_adj ~e-162)
# 1: T cell # EEF1A1 (p_val_adj ~e-164), TRAC (p_val_adj ~e-126)
# 2: Monocyte # S100A8 (p_val_adj ~0), S100A9 (p_val_adj ~0)
# 3: B cell # CD79A (p_val_adj ~0), CD79B (p_val_adj ~0)
# 4: Dendritic cell # LILRA4 (p_val_adj ~e-178)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "NK/NKT"
surt$cell_type[surt$cell_type == 1] <- "T"
surt$cell_type[surt$cell_type == 2] <- "Mono"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/21_002_pbmc3000_seurat.rds")
# Load data.
<- readRDS("backup/21_002_pbmc3000_seurat.rds") surt
Perform random sampling of 2000 cells.
set.seed(2)
<- sample(dim(pbmc)[2], 2000, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(10))
surt <- Seurat::FindClusters(surt, resolution = 0.18)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 2000 cells (cell type)", x = "tSNE_1", y = "tSNE_2",
ggplot2color = "") +
::theme_classic(base_size = 20) + ggplot2::scale_colour_hue() +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0120.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Investigate significant signs.
set.seed(1)
<- colData(sce$CB)$seurat_clusters
labels $CB <- compute_sepI_all(sce = sce$CB, labels = labels, nrand_samples = NULL)
sce# Investigate significant genes.
set.seed(1)
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(altExp(sce$CB), "counts"))
mat "GEM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "GEM"
Seurat<- Seurat::SetIdent(surt, value = "seurat_clusters")
surt <- Seurat::FindAllMarkers(surt, only.pos = TRUE,
res min.pct = 0.25, logfc.threshold = 0.25)
metadata(sce$CB)$marker_genes$all <- res
View(metadata(sce$CB)$marker_signs$all)
View(metadata(sce$CB)$marker_genes$all)
# Infer cell types.
# 0: T cell # EEF1A1 (p_val_adj ~e-169), TRAC (p_val_adj ~e-149)
# 1: Monocyte # MSigDBID:13-S (...KUPFFER_CELLS_3) (sepI ~0.99)
# 2: NK/NKT # MSigDBID:1-V (...NK_NKT_CELLS_1) (sepI ~0.97)
# 3: B cell # MSigDBID:23-V (...B_CELLS) (sepI ~0.94)
# 4: Dendritic cell # CellMarkerID:72-S (Dendritic cell) (sepI ~0.97)
colData(sce$CB)$cell_type <- as.character(colData(sce$CB)$seurat_clusters)
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 0] <- "T"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 1] <- "Mono"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 2] <- "NK/NKT"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 3] <- "B"
colData(sce$CB)$cell_type[colData(sce$CB)$cell_type == 4] <- "DC"
# Save data.
saveRDS(sce, file = "backup/21_003_pbmc2000_asurat.rds")
# Load data.
<- readRDS("backup/21_003_pbmc2000_asurat.rds") sce
Run Seurat protocols.
# Create Seurat objects.
<- Seurat::CreateSeuratObject(counts = as.matrix(assay(subpbmc, "counts")),
surt project = "PBMC")
# Normalization
<- Seurat::NormalizeData(surt, normalization.method = "LogNormalize")
surt # Variance stabilizing transform
<- round(0.9 * ncol(surt))
n <- Seurat::FindVariableFeatures(surt, selection.method = "vst", nfeatures = n)
surt # Scale data
<- Seurat::ScaleData(surt)
surt # Principal component analysis
<- Seurat::RunPCA(surt, features = Seurat::VariableFeatures(surt))
surt # Create k-nearest neighbor graph.
<- Seurat::FindNeighbors(surt, reduction = "pca", dim = seq_len(10))
surt # Cluster cells.
<- Seurat::FindClusters(surt, resolution = 0.06)
surt # Run t-SNE.
<- Seurat::RunTSNE(surt, dims.use = seq_len(2), reduction = "pca",
surt dims = seq_len(pc), do.fast = FALSE, perplexity = 30)
# Show the clustering results.
<- "PBMC 2000 cells (Seurat)"
title <- surt$seurat_clusters
labels <- surt@reductions[["tsne"]]@cell.embeddings
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = title, x = "tSNE_1", y = "tSNE_2", color = "") +
ggplot2::theme_classic(base_size = 20) +
ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 18)) +
ggplot2::scale_colour_hue() +
ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 4)))
ggplot2<- "figures/figure_21_0125.png"
filename ::ggsave(file = filename, plot = p, dpi = 50, width = 5.1, height = 4.3)
ggplot2# Find differentially expressed genes.
@misc$stat <- Seurat::FindAllMarkers(surt, only.pos = TRUE, min.pct = 0.25,
surtlogfc.threshold = 0.25)
View(surt@misc$stat[which(surt@misc$stat$p_val_adj < 10^(-100)), ])
# Infer cell types
# 0: T cell # TRAC (p_val_adj ~e-164), EEF1A1 (p_val_adj ~e-152)
# 1: Monocyte # FCN1 (p_val_adj ~0), MNDA (p_val_adj ~0)
# 2: NK/NKT # GZMA (p_val_adj ~e-241), NKG7 (p_val_adj ~e-225)
# 3: B cell # CD79A (p_val_adj ~e-304), MS4A1 (p_val_adj ~e-303)
# 4: Dendritic cell? # LILRA4 (p_val_adj ~e-146)
<- as.integer(as.character(surt$seurat_clusters))
tmp $cell_type <- tmp
surt$cell_type[surt$cell_type == 0] <- "T"
surt$cell_type[surt$cell_type == 1] <- "Mono"
surt$cell_type[surt$cell_type == 2] <- "NK/NKT"
surt$cell_type[surt$cell_type == 3] <- "B"
surt$cell_type[surt$cell_type == 4] <- "DC"
surt# Save data.
saveRDS(surt, file = "backup/21_003_pbmc2000_seurat.rds")
# Load data.
<- readRDS("backup/21_003_pbmc2000_seurat.rds") surt
Perform random sampling of 1500 cells.
set.seed(2)
<- sample(dim(pbmc)[2], 1500, prob = NULL, replace = FALSE)
inds <- pbmc[, inds] subpbmc
Compute a correlation matrix.
<- t(as.matrix(assay(subpbmc, "centered")))
mat <- cor(mat, method = "spearman") cormat
Run ASURAT protocols.
# Add formatted databases.
<- list(CB = subpbmc)
sce metadata(sce$CB) <- list(sign = human_CB[["cell"]])
# Remove biological terms including too few or too many genes.
$CB <- remove_signs(sce = sce$CB, min_ngenes = 2, max_ngenes = 1000)
sce# Create signs and sign-by-sample matrices.
set.seed(1)
$CB <- cluster_genesets(sce = sce$CB, cormat = cormat,
sceth_posi = 0.2, th_nega = -0.4)
$CB <- create_signs(sce = sce$CB, min_cnt_strg = 4, min_cnt_vari = 4)
sce$CB <- makeSignMatrix(sce = sce$CB, weight_strg = 0.5, weight_vari = 0.5)
sce# Perform dimensional reduction.
set.seed(1)
<- t(as.matrix(assay(sce$CB, "counts")))
mat <- Rtsne::Rtsne(mat, dim = 2, pca = TRUE, initial_dims = 50)
res reducedDim(sce$CB, "TSNE") <- res[["Y"]]
# Perform unsupervised clustering of cells.
<- Seurat::as.Seurat(sce$CB, counts = "counts", data = "counts")
surt <- as.matrix(assay(sce$CB, "counts"))
mat "SSM"]] <- Seurat::CreateAssayObject(counts = mat)
surt[[::DefaultAssay(surt) <- "SSM"
Seurat<- Seurat::ScaleData(surt, features = rownames(surt))
surt <- Seurat::RunPCA(surt, features = rownames(surt))
surt <- Seurat::FindNeighbors(surt, reduction = "pca", dims = seq_len(20))
surt <- Seurat::FindClusters(surt, resolution = 0.30)
surt <- Seurat::as.SingleCellExperiment(surt)
temp colData(sce$CB)$seurat_clusters <- colData(temp)$seurat_clusters
# Show the clustering results in t-SNE spaces.
<- colData(sce$CB)$seurat_clusters
labels <- as.data.frame(reducedDim(sce$CB, "TSNE"))
df <- ggplot2::ggplot() +
p ::geom_point(ggplot2::aes(x = df[, 1], y = df[, 2], color = labels),
ggplot2size = 1, alpha = 1) +
::labs(title = "PBMC 1500 cells (cell type)", ggplot2