.fread <- function(x, ...) {
    .cmd <- ifelse(str_ends(x, "gz"), "gzip -cd ", "cat")
    fread(cmd = .cmd %&% " " %&% x %&% "| sed 's/  / NA /g'", ...)
}

.ggplot <- function(...) {

    ggplot(...) +
        theme_bw() +
        ggplot2::theme(plot.background = element_blank(),
                       plot.margin = unit(c(0,.5,0,.5), 'lines'),
                       strip.background = element_blank(),
                       strip.text = element_text(size=6),
                       legend.background = element_blank(),
                       legend.text = element_text(size = 8),
                       legend.title = element_text(size = 8),
                       axis.title = element_text(size = 8),
                       legend.key.width = unit(1, 'lines'),
                       legend.key.height = unit(.2, 'lines'),
                       legend.key.size = unit(1, 'lines'),
                       axis.line = element_line(color = 'gray20', size = .5),
                       axis.text = element_text(size = 6))
}

.read.v4 <- function(hdr) {
    .files <- list.files(hdr %&% "/summary/",
                         pattern = "eval.gz", full.names=TRUE)

    .ct <- c("character","character","character",
             "character","character", "integer",
             "double","double","double",
             "double","double","double",
             "double","double","double",
             "double","double")

    .dat <- lapply(.files, .fread, header=TRUE, fill=TRUE,
                   colClasses = .ct)

    .dat <- .dat %>%
        do.call(what=rbind) %>%
        mutate(p1 = as.numeric("0." %&% p1)) %>%
        mutate(pa = as.numeric("0." %&% pa)) %>%
        mutate(pf = as.numeric("0." %&% pf)) %>%
        mutate(p0 = as.numeric("0." %&% p0))
}

.read.named.v4 <- function(x) {
    xx <-
        str_remove(x, "sim_v4_") %>%
        str_remove_all("[NMS]") %>%
        str_split("[_B]") %>%
        unlist %>%
        as.integer

    names(xx) <- c("N","M","S","B")
    ret <- .read.v4(x) %>% as.data.table
    ret[, N := xx[1]]
    ret[, M := xx[2]]
    ret[, S := xx[3]]
    ret[, B := xx[4]]
    return(ret)
}

.file <- ".simulation.v4.rdata"
if(!file.exists(.file)) {

    result.dt <-
        list.files(".", "sim_v4") %>%
        lapply(FUN=.read.named.v4) %>%
        do.call(what=rbind)

    save(list="result.dt", file=.file)
} else {
    load(.file)
}
result.dt[, p1.lab := "Variance Caused by Disease: " %&% (p1*100) %&% "%"]
result.dt[, ncell.per.ind := factor(M, c(50, 20), c(50, 20) %&% " cells per individual")]

.method <- c("cocoa", "mu", "avg", "tot", "MAST", "cf")
.method.lab <- c("CoCoA", "Bayesian", "Mean", "Total", "MAST", "Confounder")

result.dt[, method := factor(method, .method, .method.lab)]
result.dt[, p1.lab := paste("Var. by Causal ", p1*100, "%")]
result.dt[, ncell.per.ind := factor(M, c(50, 20), c(50, 20) %&% " cells per individual")]

1. Total discovery rate when there were no causal gene

plot.edr <- function(.p0){
  .dt <- result.dt[p1 == 0 & pf == 0 & p0 == .p0]
  .aes <- aes(x = method, y = edr01, fill = method)
  .title <- "Variation by Confounding Effect = " %&% (100 * .p0) %&% "%"
  .ggplot(.dt, .aes) +
    ylab("Total Discovery Rate") +
    xlab("Methods") +
    ggtitle(.title) +
    facet_grid(ncell.per.ind ~ N) +
    scale_y_sqrt(breaks = c(1e-2, 0.1, 0.2, 0.5)) +
    scale_fill_brewer(palette="Paired") +
    scale_colour_brewer(palette="Paired") +
    theme(legend.position = "none") +
    theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1)) +
    geom_hline(yintercept = .1, lty = 2, colour = 2, size = .5) +
    geom_hline(yintercept = .01, lty = 2, colour = 2, size = .5) +
    geom_jitter(height = 0, stroke = .1, pch = 21, size = 2, width = .1)
}

plt <- plot.edr(0)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_NoCausal_Conf0.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

plt <- plot.edr(.5)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_NoCausal_Conf50.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

2. Empirical False Discovery Rate adjusted by the `qvalue` method

plot.efdr <- function(.p1, .p0 = .5, .pf = 0) {

  .title <-
    "Variation by Disease Effect = " %&% (100 * .p1) %&% "%\n" %&%
    "Variation by Confounding Effect = " %&% (100 * .p0) %&% "%"

  .dt <- result.dt[p1 == .p1 & p0 == .p0 & pf == .pf]

  .aes <- aes(x = method, y = efdr01, fill = method)

  .ggplot(.dt, .aes) +
    ggtitle(.title) +
    ylab("Empirical FDR (when q-value < .01)") +
    xlab("Methods") +
    facet_grid(ncell.per.ind ~ N) +
    scale_y_sqrt(breaks = c(1e-2, 0.1, 0.2, 0.5)) +
    scale_fill_brewer(palette="Paired") +
    scale_colour_brewer(palette="Paired") +
    theme(legend.position = "none") +
    theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1)) +
    geom_hline(yintercept = .1, lty = 2, colour = 2, size = .2) +
    geom_hline(yintercept = .01, lty = 2, colour = 2, size = .2) +
    geom_jitter(height = 0, stroke = .1, pch = 21, size = 2, width = .2)
}

When there were no effects that may confound gene expressions with disease labels

plt <- plot.efdr(.p1 = .1, .p0 = 0, .pf = 0)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease10_Conf0.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

plt <- plot.efdr(.p1 = .3, .p0 = 0, .pf = 0)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease30_Conf0.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

When there were effects confounding expressions with disease labels

plt <- plot.efdr(.p1 = .1, .p0 = .5, .pf = 0)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease10_Conf50.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

plt <- plot.efdr(.p1 = .3, .p0 = .5, .pf = 0)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease30_Conf50.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

When sequential ignorability breaks down, but there were no confounder

plt <- plot.efdr(.p1 = .1, .p0 = 0, .pf = .1)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease10_Conf0_Violating.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

plt <- plot.efdr(.p1 = .3, .p0 = 0, .pf = .1)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease30_Conf0_Violating.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

When sequential ignorability breaks down and there were strong confounding effects

plt <- plot.efdr(.p1 = .1, .p0 = .5, .pf = .1)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease10_Conf50_Violating.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

plt <- plot.efdr(.p1 = .3, .p0 = .5, .pf = .1)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease30_Conf50_Violating.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

Simulation results for the causal discovery of DEGs

Yongjin Park

1. Total discovery rate when there were no causal gene

2. Empirical False Discovery Rate adjusted by the `qvalue` method

When there were no effects that may confound gene expressions with disease labels

When there were effects confounding expressions with disease labels

When sequential ignorability breaks down, but there were no confounder

When sequential ignorability breaks down and there were strong confounding effects

Simulation results for the causal discovery of DEGs

Yongjin Park

1. Total discovery rate when there were no causal gene

2. Empirical False Discovery Rate adjusted by the qvalue method

When there were no effects that may confound gene expressions with disease labels

When there were effects confounding expressions with disease labels

When sequential ignorability breaks down, but there were no confounder

When sequential ignorability breaks down and there were strong confounding effects

2. Empirical False Discovery Rate adjusted by the `qvalue` method