.fread <- function(x, ...) {
    .cmd <- ifelse(str_ends(x, "gz"), "gzip -cd ", "cat")
    fread(cmd = .cmd %&% " " %&% x %&% "| sed 's/  / NA /g'", ...)
}

.ggplot <- function(...) {

    ggplot(...) +
        theme_bw() +
        ggplot2::theme(plot.background = element_blank(),
                       plot.margin = unit(c(0,.5,0,.5), 'lines'),
                       strip.background = element_blank(),
                       strip.text = element_text(size=6),
                       legend.background = element_blank(),
                       legend.text = element_text(size = 8),
                       legend.title = element_text(size = 8),
                       axis.title = element_text(size = 8),
                       legend.key.width = unit(1, 'lines'),
                       legend.key.height = unit(.2, 'lines'),
                       legend.key.size = unit(1, 'lines'),
                       axis.line = element_line(color = 'gray20', size = .5),
                       axis.text = element_text(size = 6))
}
.read.v4 <- function(hdr) {
    .files <- list.files(hdr %&% "/summary/",
                         pattern = "eval.gz", full.names=TRUE)

    .ct <- c("character","character","character",
             "character","character", "integer",
             "double","double","double",
             "double","double","double",
             "double","double","double",
             "double","double")

    .dat <- lapply(.files, .fread, header=TRUE, fill=TRUE,
                   colClasses = .ct)

    .dat <- .dat %>%
        do.call(what=rbind) %>%
        mutate(p1 = as.numeric("0." %&% p1)) %>%
        mutate(pa = as.numeric("0." %&% pa)) %>%
        mutate(pf = as.numeric("0." %&% pf)) %>%
        mutate(p0 = as.numeric("0." %&% p0))
}

.read.named.v4 <- function(x) {
    xx <-
        str_remove(x, "sim_v4_") %>%
        str_remove_all("[NMS]") %>%
        str_split("[_B]") %>%
        unlist %>%
        as.integer

    names(xx) <- c("N","M","S","B")
    ret <- .read.v4(x) %>% as.data.table
    ret[, N := xx[1]]
    ret[, M := xx[2]]
    ret[, S := xx[3]]
    ret[, B := xx[4]]
    return(ret)
}
.file <- ".simulation.v4.rdata"
if(!file.exists(.file)) {

    result.dt <-
        list.files(".", "sim_v4") %>%
        lapply(FUN=.read.named.v4) %>%
        do.call(what=rbind)

    save(list="result.dt", file=.file)
} else {
    load(.file)
}
result.dt[, p1.lab := "Variance Caused by Disease: " %&% (p1*100) %&% "%"]
result.dt[, ncell.per.ind := factor(M, c(50, 20), c(50, 20) %&% " cells per individual")]

.method <- c("cocoa", "mu", "avg", "tot", "MAST", "cf")
.method.lab <- c("CoCoA", "Bayesian", "Mean", "Total", "MAST", "Confounder")

result.dt[, method := factor(method, .method, .method.lab)]
result.dt[, p1.lab := paste("Var. by Causal ", p1*100, "%")]
result.dt[, ncell.per.ind := factor(M, c(50, 20), c(50, 20) %&% " cells per individual")]

1. Total discovery rate when there were no causal gene

plot.edr <- function(.p0){
  .dt <- result.dt[p1 == 0 & pf == 0 & p0 == .p0]
  .aes <- aes(x = method, y = edr01, fill = method)
  .title <- "Variation by Confounding Effect = " %&% (100 * .p0) %&% "%"
  .ggplot(.dt, .aes) +
    ylab("Total Discovery Rate") +
    xlab("Methods") +
    ggtitle(.title) +
    facet_grid(ncell.per.ind ~ N) +
    scale_y_sqrt(breaks = c(1e-2, 0.1, 0.2, 0.5)) +
    scale_fill_brewer(palette="Paired") +
    scale_colour_brewer(palette="Paired") +
    theme(legend.position = "none") +
    theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1)) +
    geom_hline(yintercept = .1, lty = 2, colour = 2, size = .5) +
    geom_hline(yintercept = .01, lty = 2, colour = 2, size = .5) +
    geom_jitter(height = 0, stroke = .1, pch = 21, size = 2, width = .1)
}
plt <- plot.edr(0)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_NoCausal_Conf0.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

plt <- plot.edr(.5)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_NoCausal_Conf50.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

2. Empirical False Discovery Rate adjusted by the qvalue method

plot.efdr <- function(.p1, .p0 = .5, .pf = 0) {

  .title <-
    "Variation by Disease Effect = " %&% (100 * .p1) %&% "%\n" %&%
    "Variation by Confounding Effect = " %&% (100 * .p0) %&% "%"

  .dt <- result.dt[p1 == .p1 & p0 == .p0 & pf == .pf]

  .aes <- aes(x = method, y = efdr01, fill = method)

  .ggplot(.dt, .aes) +
    ggtitle(.title) +
    ylab("Empirical FDR (when q-value < .01)") +
    xlab("Methods") +
    facet_grid(ncell.per.ind ~ N) +
    scale_y_sqrt(breaks = c(1e-2, 0.1, 0.2, 0.5)) +
    scale_fill_brewer(palette="Paired") +
    scale_colour_brewer(palette="Paired") +
    theme(legend.position = "none") +
    theme(axis.text.x = element_text(angle=90, vjust=0, hjust=1)) +
    geom_hline(yintercept = .1, lty = 2, colour = 2, size = .2) +
    geom_hline(yintercept = .01, lty = 2, colour = 2, size = .2) +
    geom_jitter(height = 0, stroke = .1, pch = 21, size = 2, width = .2)
}

When there were no effects that may confound gene expressions with disease labels

plt <- plot.efdr(.p1 = .1, .p0 = 0, .pf = 0)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease10_Conf0.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

plt <- plot.efdr(.p1 = .3, .p0 = 0, .pf = 0)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease30_Conf0.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

When there were effects confounding expressions with disease labels

plt <- plot.efdr(.p1 = .1, .p0 = .5, .pf = 0)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease10_Conf50.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

plt <- plot.efdr(.p1 = .3, .p0 = .5, .pf = 0)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease30_Conf50.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

When sequential ignorability breaks down, but there were no confounder

plt <- plot.efdr(.p1 = .1, .p0 = 0, .pf = .1)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease10_Conf0_Violating.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

plt <- plot.efdr(.p1 = .3, .p0 = 0, .pf = .1)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease30_Conf0_Violating.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

When sequential ignorability breaks down and there were strong confounding effects

plt <- plot.efdr(.p1 = .1, .p0 = .5, .pf = .1)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease10_Conf50_Violating.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF

plt <- plot.efdr(.p1 = .3, .p0 = .5, .pf = .1)
print(plt)

.file <- fig.dir %&% "/Fig_FDR_Disease30_Conf50_Violating.pdf"
.gg.save(.file, plt, width = 8, height = 5)

PDF