Out of 21500 test we’d expect around 0.05*21500
to
be significant by chance.
(and 3, 5 and 6)
library("MESS")
data(superroot2)
pval <- by(superroot2,
superroot2$gene,
FUN=function(dat) {
anova(lm(log(signal) ~ array + color + plant,
data=dat))[3,5]} )
library("ggplot2")
library("tidyverse")
DF <- data.frame(pval=as.vector(pval))
DF %>% ggplot(aes(x=pval)) + geom_histogram(col="lightblue")
# Remember that some are missing
p1 <- p.adjust(as.vector(pval), method="bonferroni")
p2 <- p.adjust(as.vector(pval), method="holm")
p3 <- p.adjust(as.vector(pval), method="BH") # FDR
head(sort(pval))
## superroot2$gene
## AT3G59930.1 AT2G39350.1 AT3G60140.1 AT4G19030.1 AT4G31500.1 AT5G13910.1
## 1.048266e-05 1.781166e-05 2.880092e-05 3.298942e-05 3.410671e-05 3.438769e-05
head(sort(p1))
## [1] 0.2253667 0.3829328 0.6191910 0.7092394 0.7332602 0.7393009
head(sort(p2))
## [1] 0.2253667 0.3829150 0.6191334 0.7091405 0.7331238 0.7391290
head(sort(p3))
## [1] 0.1075944 0.1075944 0.1075944 0.1075944 0.1075944 0.1075944
Note that one of the tests resulted in a NA
so there
are actually only 21499 tests undertaken. If this number is used as the
denominator then the numbers match.
Due to the nature of the BH approach. If an earlier p-value had a lower adjusted fdr-value then that value is used for lower ranked adjusted fdr values.
library("glmnet")
load(url("http://www.biostatistics.dk/teaching/advtopicsA/data/lassodata.rda"))
m1 <- glmnet(genotype, phenotype)
Ensure that the penalty is the same across predictors such that it does not depend on scale.
m2 <- cv.glmnet(genotype, phenotype)
plot(m2)
lambda <- m2$lambda.1se
ests <- coef(m1, s=lambda)
ests[ests != 0]
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
## [1] 6.155399e-01 7.945800e-02 -1.511681e-01 4.578236e-14 -2.143880e-02
## [6] 1.214096e-01 -1.767616e-02 1.371258e-01 1.326876e-01 1.127302e-03
m3 <- glmnet(genotype, phenotype, family="binomial")
m4 <- cv.glmnet(genotype, phenotype, family="binomial")
plot(m3)
plot(m4)
ests2 <- coef(m3, s=m4$lambda.1se)
ests2[ests2 != 0]
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
## [1] 6.062753e-01 3.685309e-01 -7.083144e-01 7.122772e-14 -1.027700e-01
## [6] 5.261112e-01 -8.621539e-02 6.335221e-01 5.779711e-01 7.274392e-03
m5 <- glmnet(genotype, phenotype, alpha=0, family="binomial")
m6 <- cv.glmnet(genotype, phenotype, alpha=0, family="binomial")
plot(m5)
plot(m6)
ests2 <- coef(m5, s=m6$lambda.1se) # Gets them all since none are zero
m7 <- glmnet(genotype, phenotype, alpha=.67, family="binomial")
m8 <- cv.glmnet(genotype, phenotype, alpha=.67, family="binomial")
plot(m7)
plot(m8)
ests2 <- coef(m7, s=m8$lambda.1se)
ests2[ests2 != 0]
## <sparse>[ <logic> ]: .M.sub.i.logical() maybe inefficient
## [1] 0.380817195 0.276653411 -0.298172779 0.270867271 -0.076465628
## [6] -0.006182403 0.169990524 0.041253335 0.040144873 0.006350749
## [11] 0.038004138 -0.024572992 -0.023649148 0.095457931 -0.021923444
## [16] -0.021274718 -0.064163458 0.502432620 0.141833575 0.059611682
## [21] -0.058574656 -0.057528482 0.121862616 0.056408785 -0.055123872
Claus Ekstrøm 2023