.hdr <- "result/step1/final_matrix"
.data <- fileset.list(.hdr)
.hash.hdr <- "result/step1/hash"
.hash.data <- fileset.list(.hash.hdr)
.hash.info <- read.hash(.hash.data) %>%
dplyr::select(-hash)
.file <- "result/step2/bbknn.rds"
.mkdir(dirname(.file))
if.needed(.file, {
.cells <-
fread(.data$col, header=F, col.names = "tag") %>%
parse.tag()
.svd <- rcpp_mmutil_svd(.data$mtx, RANK=50, TAKE_LN=T, EM_ITER = 20, NUM_THREADS=16)
.bbknn <- rcpp_mmutil_bbknn(r_svd_v = .svd$V,
r_svd_u = .svd$U,
r_svd_d = .svd$D,
r_batches = .cells$batch,
knn = 50,
NUM_THREADS = 16,
USE_SINGULAR_VALUES = F)
saveRDS(.bbknn, .file)
})
.bbknn <- readRDS(.file)
.file <- "result/step2/annotation_bbknn.txt.gz"
if.needed(.file, {
.annot.out <-
rcpp_mmutil_annotate_columns(
pos_labels = list(p1=.pos.markers),
r_neg_labels = list(n1=.neg.markers),
r_U = .bbknn$U,
r_D = .bbknn$D,
r_V = .bbknn$factors.adjusted,
row_file = .data$row,
col_file = .data$col,
EM_TOL = 1e-8,
EM_ITER = 500,
TAKE_LN = F)
.col <- c("tag", "celltype", "prob", "ln.prob")
names(.annot.out$annotation) <- .col
annot.dt <- setDT(.annot.out$annotation) %>%
parse.tag()
fwrite(annot.dt, .file)
})
.file <- "Tab/step2_cell_type.txt.gz"
if.needed(.file, {
.cells <-
fread(.data$col, header=F, col.names = "tag") %>%
parse.tag()
.leiden <- run.leiden(.bbknn$knn.adj, .cells$tag, res=1, nrepeat = 100)
.tab <-
.leiden %>%
left_join(annot.dt) %>%
na.omit()
## identify problematic clusters
.fraction <- .tab[, .(.N), by = .(celltype, membership, component)]
.fraction[, Ntot := sum(`N`), by = .(membership, component)]
.mem.qc <- .fraction[order(`N`, decreasing = T), head(.SD, 1), by = .(membership, component)]
.remove <- .mem.qc[`N` / `Ntot` < .5, .(membership, component)]
## Map cell group membership to cell type
.ct.map <- .mem.qc[, .(membership, component, celltype)]
final.cell.type <- .leiden %>%
anti_join(.remove) %>%
na.omit() %>%
left_join(.ct.map) %>%
as.data.table() %>%
parse.tag()
fwrite(final.cell.type, .file)
})
final.cell.type <- fread(.file)
.file <- "Tab/step2_umap_coord.txt.gz"
if.needed(.file, {
.umap <- uwot::tumap(.bbknn$factors.adjusted,
learning_rate = .1,
n_epochs = 2000,
n_sgd_threads = 16,
init = "laplacian",
verbose = T,
init_sdev = .01,
scale = F)
.tags <- readLines(.data$col)
colnames(.umap) <- "UMAP" %&% 1:ncol(.umap)
umap.dt <- data.table(.umap, tag = .tags)
fwrite(umap.dt, .file)
})
umap.dt <- fread(.file) %>%
left_join(final.cell.type) %>%
left_join(.hash.info) %>%
na.omit()
.file <- "Tab/step2_tsne_coord.txt.gz"
if.needed(.file, {
.tsne <- Rtsne::Rtsne(.bbknn$factors.adjusted,
num_threads = 16,
verbose = T,
check_duplicates = F)
.tags <- readLines(.data$col)
colnames(.tsne$Y) <- "tSNE" %&% 1:ncol(.tsne$Y)
tsne.dt <- data.table(.tsne$Y, tag = .tags)
fwrite(tsne.dt, .file)
})
tsne.dt <- fread(.file) %>%
left_join(final.cell.type) %>%
left_join(.hash.info) %>%
na.omit()
p1 <- .gg.plot(umap.dt[sample(.N)], aes(UMAP1, UMAP2, color=celltype)) +
ggrastr::rasterise(geom_point(stroke = 0, size=1), dpi=300) +
theme(legend.position = c(0,0), legend.justification = c(0,0)) +
ggtitle("cell types") +
scale_color_brewer("", palette = "Paired")
p2 <- .gg.plot(umap.dt[sample(.N)], aes(UMAP1, UMAP2, color=as.factor(batch))) +
ggrastr::rasterise(geom_point(stroke = 0, size=1), dpi=300) +
theme(legend.position = c(0,0), legend.justification = c(0,0)) +
ggtitle("batch membership") +
scale_color_brewer("", palette = "Set3")
p3 <- .gg.plot(umap.dt[sample(.N)], aes(UMAP1, UMAP2, color=as.factor(membership))) +
ggrastr::rasterise(geom_point(stroke = 0, size=1), dpi=300) +
theme(legend.position = c(0,0), legend.justification = c(0,0)) +
ggtitle("clustering") +
scale_color_brewer("", palette = "Set1")
p4 <- .gg.plot(umap.dt[sample(.N)], aes(UMAP1, UMAP2, color=as.factor(disease))) +
ggrastr::rasterise(geom_point(stroke = 0, size=1), dpi=300) +
theme(legend.position = c(0,0), legend.justification = c(0,0)) +
ggtitle("clustering") +
scale_color_brewer("", palette = "Dark2")
plt <- p1 | p2 | p3 | p4
print(plt)
p1 <-
.gg.plot(tsne.dt[sample(.N)], aes(tSNE1, tSNE2, color=celltype)) +
ggrastr::rasterise(geom_point(stroke = 0, size=1), dpi=300) +
theme(legend.position = c(0,0), legend.justification = c(0,0)) +
ggtitle("cell types") +
scale_color_brewer("", palette = "Paired")
p2 <-
.gg.plot(tsne.dt[sample(.N)], aes(tSNE1, tSNE2, color=as.factor(batch))) +
ggrastr::rasterise(geom_point(stroke = 0, size=1), dpi=300) +
theme(legend.position = c(0,0), legend.justification = c(0,0)) +
ggtitle("batch membership") +
scale_color_brewer("", palette = "Set3")
p3 <-
.gg.plot(tsne.dt[sample(.N)], aes(tSNE1, tSNE2, color=as.factor(membership))) +
ggrastr::rasterise(geom_point(stroke = 0, size=1), dpi=300) +
theme(legend.position = c(0,0), legend.justification = c(0,0)) +
ggtitle("clustering") +
scale_color_brewer("", palette = "Set1")
p4 <-
.gg.plot(tsne.dt[sample(.N)], aes(tSNE1, tSNE2, color=disease)) +
ggrastr::rasterise(geom_point(stroke = 0, size=1), dpi=300) +
theme(legend.position = c(0,0), legend.justification = c(0,0)) +
ggtitle("clustering") +
scale_color_brewer("", palette = "Dark2")
plt <- p1 | p2 | p3 | p4
print(plt)
Two-dimensional density plot on the raw CD marker concentrations.
.ct <- c("mTreg","nTreg","mTconv","nTconv")
plt <- plt.scatter.ct.2(.ct, final.cell.type, marker.raw.mtx)
print(plt)
.markers <-
c("FOXP3", "ID3", "BACH2", "CXCR3", "PRDM1", "SGK1", "TCF7", "LEF1",
"SELL", "IL2RA", "IL7R", "IKZF2", "CCR6", "CCR4", "CCR7", "CTLA4",
"HLA-DRA", "CD25", "CD127", "CD183", "CD196",
"CD197", "CD194", "CD45RA", "CD45RO",
"HLA") %>%
unique
.markers <- sort(as.character(unique(marker.dt$marker)))
for(g in .markers){
plt <- plot.marker.scatter(g, show.tsne = F)
print(plt)
.file <- fig.dir %&% "/Fig_marker_" %&% g %&% "_umap.pdf"
.gg.save(filename = .file, plot = plt, width=3, height=5)
}
.markers <- sort(as.character(unique(marker.dt$marker)))
for(g in .markers){
plt <- plot.marker.scatter(g, show.tsne = T)
print(plt)
.file <- fig.dir %&% "/Fig_marker_" %&% g %&% "_tsne.pdf"
.gg.save(filename = .file, plot = plt, width=3, height=3)
}
prdm.dt <- fread("data/PRDM1/PRDM1_SL.csv.gz")
.dt <-
copy(final.cell.type) %>%
left_join(prdm.dt) %>%
left_join(umap.dt) %>%
left_join(tsne.dt) %>%
na.omit() %>%
as.data.table()
.dt[, membership := as.factor(`membership`)]
.aes <- aes(UMAP1, UMAP2, colour=pmin(PRDM1_short, 3))
p1 <-
ggplot(.dt[order(PRDM1_short)], .aes) +
xlab("UMAP 1") + ylab("UMAP 2") + prdm.thm +
ggrastr::rasterise(geom_point(stroke=0, size=1), dpi = 300) +
scale_colour_distiller("PRDM1\nshort",
palette="RdPu",
direction=1)
.aes <- aes(UMAP1, UMAP2, colour = pmin(PRDM1_long, 3))
p2 <-
ggplot(.dt[order(PRDM1_long)], .aes) +
xlab("UMAP 1") + ylab("UMAP 2") + prdm.thm +
ggrastr::rasterise(geom_point(stroke=0, size=1), dpi = 300) +
scale_colour_distiller("PRDM1\nlong",
palette="RdPu",
direction=1)
plt <- p1 | p2
print(plt)
.aes <- aes(tSNE1, tSNE2, colour=pmin(PRDM1_short, 3))
p1 <-
ggplot(.dt[order(PRDM1_short)], .aes) +
xlab("tSNE 1") + ylab("tSNE 2") + prdm.thm +
ggrastr::rasterise(geom_point(stroke=0, size=1), dpi = 300) +
scale_colour_distiller("PRDM1\nshort",
palette="RdPu",
direction=1)
.aes <- aes(tSNE1, tSNE2, colour = pmin(PRDM1_long, 3))
p2 <-
ggplot(.dt[order(PRDM1_long)], .aes) +
xlab("tSNE 1") + ylab("tSNE 2") + prdm.thm +
ggrastr::rasterise(geom_point(stroke=0, size=1), dpi = 300) +
scale_colour_distiller("PRDM1\nlong",
palette="RdPu",
direction=1)
plt <- p1 | p2
print(plt)