.read.named.v5 <- function(x) {
xx <-
str_remove(x, "sim_v5_") %>%
str_remove_all("[NMS]") %>%
str_split("[_B]") %>%
unlist %>%
as.integer
names(xx) <- c("N","M","S","B")
ret <- .read.v5(x) %>% as.data.table
ret[, N := xx[1]]
ret[, M := xx[2]]
ret[, S := xx[3]]
ret[, B := xx[4]]
return(ret)
}
.file <- ".simulation.v5.rdata"
if(!file.exists(.file)) {
result.dt <- .read.named.v5("sim_v5_N40_M50_S5B0")
save(list="result.dt", file=.file)
} else {
load(.file)
}
result.dt[, knn := factor(knn, c(1,10,50,100,200))]
result.dt[, p1.lab := paste("Var. by Causal ", p1*100, "%")]
result.dt[, p0.lab := paste("Var. by Confounder", p0*100, "%")]
.aes <- aes(x=as.factor(p1*100),
y=auprc,
fill=knn)
plt <-
.ggplot(result.dt, .aes) +
facet_grid(~p0.lab) +
geom_boxplot(size=.3, outlier.size=0, outlier.stroke=0) +
scale_fill_brewer("# neighbours") +
ylab("AUPRC") +
xlab("Variance Caused by Disease Effect (%)")
print(plt)
.file <- fig.dir %&% "/Fig_knn_auprc.pdf"
.gg.save(.file, plt, width=6, height=3)
We sought to control false discovery rates without looking at actual causal gene labels using John Storey’s empirical FDR calibration method implemented in qvalue
package.
qvalue
package.aes <- aes(x=as.factor(p1*100),
y=edr01,
fill=knn)
plt <-
.ggplot(result.dt, .aes) +
facet_grid(~p0.lab) +
geom_boxplot(size=.3, outlier.size=0, outlier.stroke=0) +
scale_y_continuous(minor_breaks = c(), breaks = c(1e-2, 5e-2, (1:9)/10)) +
geom_hline(yintercept = .01, lty = 2, colour = 2, size = .5) +
scale_fill_brewer("# neighbours") +
ylab("Discovery Rate (out of 10k genes)") +
xlab("Variance Caused by Disease Effect (%)")
print(plt)
.file <- fig.dir %&% "/Fig_knn_edr01.pdf"
.gg.save(.file, plt, width=6, height=3)
qvalue
package.aes <- aes(x=as.factor(p1*100),
y=edr05,
fill=knn)
plt <-
.ggplot(result.dt, .aes) +
facet_grid(~p0.lab) +
geom_boxplot(size=.3, outlier.size=0, outlier.stroke=0) +
scale_y_continuous(minor_breaks = c(), breaks = c(1e-2, 5e-2, (1:9)/10)) +
geom_hline(yintercept = .05, lty = 2, colour = 2, size = .5) +
scale_fill_brewer("# neighbours") +
ylab("Discovery Rate (out of 10k genes)") +
xlab("Variance Caused by Disease Effect (%)")
print(plt)
.file <- fig.dir %&% "/Fig_knn_edr05.pdf"
.gg.save(.file, plt, width=6, height=3)
qvalue
package.aes <- aes(x=as.factor(p1*100),
y=edr10,
fill=knn)
plt <-
.ggplot(result.dt, .aes) +
facet_grid(~p0.lab) +
geom_boxplot(size=.3, outlier.size=0, outlier.stroke=0) +
scale_y_continuous(minor_breaks = c(), breaks = c(1e-2, 5e-2, (1:9)/10)) +
geom_hline(yintercept = .10, lty = 2, colour = 2, size = .5) +
scale_fill_brewer("# neighbours") +
ylab("Discovery Rate (out of 10k genes)") +
xlab("Variance Caused by Disease Effect (%)")
print(plt)
.file <- fig.dir %&% "/Fig_knn_edr10.pdf"
.gg.save(.file, plt, width=6, height=3)
qvalue
package.aes <- aes(x=as.factor(p1*100),
y=efdr01,
fill=knn)
plt <-
.ggplot(result.dt, .aes) +
facet_grid(~p0.lab) +
geom_boxplot(size=.3, outlier.size=0, outlier.stroke=0) +
scale_y_continuous(minor_breaks = c(), breaks = c(1e-2, 5e-2, (1:9)/10)) +
geom_hline(yintercept = .01, lty = 2, colour = 2, size = .5) +
scale_fill_brewer("# neighbours") +
ylab("empirical False Discovery Rate") +
xlab("Variance Caused by Disease Effect (%)")
print(plt)
.file <- fig.dir %&% "/Fig_knn_efdr01.pdf"
.gg.save(.file, plt, width=6, height=3)
qvalue
package.aes <- aes(x=as.factor(p1*100),
y=efdr05,
fill=knn)
plt <-
.ggplot(result.dt, .aes) +
facet_grid(~p0.lab) +
geom_boxplot(size=.3, outlier.size=0, outlier.stroke=0) +
scale_y_continuous(minor_breaks = c(), breaks = c(1e-2, 5e-2, (1:9)/10)) +
geom_hline(yintercept = .05, lty = 2, colour = 2, size = .5) +
scale_fill_brewer("# neighbours") +
ylab("empirical False Discovery Rate") +
xlab("Variance Caused by Disease Effect (%)")
print(plt)
.file <- fig.dir %&% "/Fig_knn_efdr05.pdf"
.gg.save(.file, plt, width=6, height=3)
qvalue
package.aes <- aes(x=as.factor(p1*100),
y=efdr10,
fill=knn)
plt <-
.ggplot(result.dt, .aes) +
facet_grid(~p0.lab) +
geom_boxplot(size=.3, outlier.size=0, outlier.stroke=0) +
scale_y_continuous(minor_breaks = c(), breaks = c(1e-2, 5e-2, (1:9)/10)) +
geom_hline(yintercept = .1, lty = 2, colour = 2, size = .5) +
scale_fill_brewer("# neighbours") +
ylab("empirical False Discovery Rate") +
xlab("Variance Caused by Disease Effect (%)")
print(plt)
.file <- fig.dir %&% "/Fig_knn_efdr10.pdf"
.gg.save(.file, plt, width=6, height=3)