Because of a lapse in government funding, the information on this website may not be up to date, transactions submitted via the website may not be processed, and the agency may not be able to respond to inquiries until appropriations are enacted.

The NIH Clinical Center (the research hospital of NIH) is open. For more details about its operating status, please visit cc.nih.gov.

Updates regarding government operating status and resumption of normal operations can be found at OPM.gov.

Biostatistics and Bioinformatics Branch (BBB)

A min–max combination of biomarkers to improve diagnostic accuracy

MinMaxROC-Erratum-R-Codesl.R

#MinMaxROC-Erratum-R-Codesl.txt
###################################
#Install R 2.13 and the package mvtnorm
#########################################
library(mvtnorm)
###############################################
####Run the following functions
####Need to define Xmin, Xmax, Ymin, Ymax
#####before calling minimax
##########################################
#function for minmax combination data
#xx=case data yy=control data
minmax<-function(para){
xx<-Xmax+para*Xmin
yy<-Ymax+para*Ymin
a<-sum(wilcox.test(xx, yy)$statistic)/nx
auc<-a/ny
return(auc)
}
#function for Nonparametric linear combination
#X-case Y-Control
Npcom<-function(para, X, Y){
a<-matrix(c(1,para), byrow=T, ncol=1)
xx<-as.vector(X%*%a)
yy<-as.vector(Y%*%a)
b<-sum(wilcox.test(yy, xx)$statistic)/nx
#use xx after yy because optim minimizes
bb<-b/ny
return(bb)
}
#function for empirical sensitivity
#x-case y-control
sen<-function(x,y, FPF){
qy<-quantile(y,1-FPF)
m<-length(FPF)
TPF<-0
for (i in 1:m){
TPF[i]<-mean(x>qy[i])
}
TPF}
#################################################
####Normal data that give Figure 1 in Section 4.1
################################################
p<-5
muX<-matrix(c(0.5, 0.6, 0.7, 0.8, 1),ncol=p,byrow=T)
muY<-matrix(rep(0,p),ncol=p,byrow=T)
SigmaX<-matrix(c(1.0,0.0,0.0,0.0,-1.0,
0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,
0.0,0.0,0.0,0.5,0.0,-1.0,0.0,0.0,0.0,2.5), byrow=T, ncol=5)
SigmaY<-SigmaX
nx<-50000
ny<-50000
X<-rmvnorm(nx, mean=muX, sigma=SigmaX)
Y<-rmvnorm(ny, mean=muY, sigma=SigmaY)
###define data for cases and data for controls
cntr<-Y
case<-X
#obtain max and min data
Xmin<-0
Xmax<-0
Ymin<-0
Ymax<-0
for (i in 1:nx){
Xmin[i]<-min(case[i,])
Xmax[i]<-max(case[i,])}
for (i in 1:ny){
Ymin[i]<-min(cntr[i,])
Ymax[i]<-max(cntr[i,])}
###############################
#Solutions
###############################
################################
#ROC curves and areas are empirical based
#on data, not on the theoretical distribution
###############################
###minimax solution
a<-optimize(minmax, lower=-500, upper=500, maximum=TRUE)
c1<-a$maximum
AUC1<-a$objective
c(c1, AUC1)
### Empirical linear combination solution
a<-optim(rep(0,p-1), Npcom, X=case, Y=cntr, hessian=T,
control=list(maxit=20000))
c2<-a$par
AUC2<-1-a$value
c(c2, AUC2)
###Normal linear combination
n<-dim(case)[1]

m<-dim(cntr)[1]
p<-dim(case)[2]
cx<-0
for (i in 1:p)
{cx[i]<-mean(case[,i])}
cy<-0
for (i in 1:p)
{cy[i]<-mean(cntr[,i])}
muX<-matrix(cx,ncol=p,byrow=T)
muY<-matrix(cy,ncol=p,byrow=T)
SigmaX<-var(case,na.rm=T)
SigmaY<-var(cntr,na.rm=T)
#best linear combinations
dmu<-muX-muY
Sigma<-SigmaX+SigmaY
c3<-solve(Sigma)%*%t(dmu)
AUC3<-pnorm(sqrt(dmu%*%c3))
c(c3, AUC3)
##################################
#Making figure
FPF<-seq(0,1,0.001)
###minimax
xx<-Xmax+c1*Xmin
yy<-Ymax+c1*Ymin
TPF1<-sen(xx,yy,FPF)
### empirical linear comb
a<-matrix(c(1,c2), byrow=T, ncol=1)
xx<-as.vector(case%*%a)
yy<-as.vector(cntr%*%a)
TPF2<-sen(xx,yy,FPF)
#### empirical normal linear
xx<-as.vector(case%*%c3)
yy<-as.vector(cntr%*%c3)
TPF3<-sen(xx,yy,FPF)
yl<-min(TPF1, TPF2, TPF3)
yu<-max(TPF1, TPF2, TPF3)
plot(FPF, TPF1, lty=1, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF", ylab="TPF")
par(new=T)
plot(FPF, TPF2, lty=2, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF", ylab="TPF")
par(new=T)
plot(FPF, TPF3, lty=3, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",ylab="TPF")
#######################################################
#######################################################
#######################################################
#############################
###Generate Data for other figures
###############################
###################################################
###################################################
#Below yields figure 2 in Section 4.2
### Normal with unequal variance
####################################################
####################################################
###############################
p<-5
muX<-matrix(c(0.5, 0.6, 0.7, 0.8, 1),ncol=p,byrow=T)
muY<-matrix(rep(0,p),ncol=p,byrow=T)
SigmaX<-matrix(c(1.0,0,0,0, -1.0,0,0.5, 0, 0,0, 0,0, 0.5,0, 0,
0,0,0,0.5, 0, -1.0, 0,0,0, 2.5), byrow=T, ncol=5)
SigmaY<-matrix(c(1,0,0.0,0,0.0,0,2,0.0,0,0.0,0,0,1.5,0,0.0,
0,0,0.0,3,0.0,0,0,0.0,0, 2.5), byrow=T, ncol=5)
nx<-50000
ny<-50000
X<-rmvnorm(nx, mean=muX, sigma=SigmaX)
Y<-rmvnorm(ny, mean=muY, sigma=SigmaY)
###define data for cases and data for controls
cntr<-Y
case<-X
#obtain max and min data
Xmin<-0
Xmax<-0
Ymin<-0
Ymax<-0

for (i in 1:nx){
Xmin[i]<-min(case[i,])
Xmax[i]<-max(case[i,])}
for (i in 1:ny){
Ymin[i]<-min(cntr[i,])
Ymax[i]<-max(cntr[i,])}
###minimax solution
a<-optimize(minmax, lower=-500, upper=500, maximum=TRUE)
c1<-a$maximum
AUC1<-a$objective
c(c1, AUC1)
### Empirical linear combination solution
a<-optim(rep(0,p-1), Npcom, X=case, Y=cntr, hessian=T,
control=list(maxit=20000))
c2<-a$par
AUC2<-1-a$value
c(c2, AUC2)
###Normal linear combination
n<-dim(case)[1]
m<-dim(cntr)[1]
p<-dim(case)[2]
cx<-0
for (i in 1:p)
{cx[i]<-mean(case[,i])}
cy<-0
for (i in 1:p)
{cy[i]<-mean(cntr[,i])}
muX<-matrix(cx,ncol=p,byrow=T)
muY<-matrix(cy,ncol=p,byrow=T)
SigmaX<-var(case,na.rm=T)
SigmaY<-var(cntr,na.rm=T)
#best linear combinations
dmu<-muX-muY
Sigma<-SigmaX+SigmaY
c3<-solve(Sigma)%*%t(dmu)
AUC3<-pnorm(sqrt(dmu%*%c3))
c(c3, AUC3)
##################################
#Making figure
FPF<-seq(0,1,0.001)
###minimax
xx<-Xmax+c1*Xmin
yy<-Ymax+c1*Ymin

TPF1<-sen(xx,yy,FPF)
### empirical linear comb
a<-matrix(c(1,c2), byrow=T, ncol=1)
xx<-as.vector(case%*%a)
yy<-as.vector(cntr%*%a)
TPF2<-sen(xx,yy,FPF)
#### empirical normal linear
xx<-as.vector(case%*%c3)
yy<-as.vector(cntr%*%c3)
TPF3<-sen(xx,yy,FPF)
yl<-min(TPF1, TPF2, TPF3)
yu<-max(TPF1, TPF2, TPF3)
plot(FPF, TPF1, lty=1, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF2, lty=2, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF3, lty=3, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
###################################################
###################################################
#Below is figure 3 in Section 4.3
####################################################
####################################################
### Log-Normal with unequal variance
##################################
p<-5
muX<-matrix(c(0.5, 0.6, 0.7, 0.8, 1),ncol=p,byrow=T)
muY<-matrix(rep(0,p),ncol=p,byrow=T)
SigmaX<-matrix(c(1.0,0,0,0, -1.0,0,0.5, 0, 0,0, 0,0, 0.5,0, 0,
0,0,0,0.5, 0, -1.0, 0,0,0, 2.5), byrow=T, ncol=5)
SigmaY<-matrix(c(1,0,0.0,0,0.0,0,2,0.0,0,0.0,0,0,1.5,0,0.0,

0,0,0.0,3,0.0,0,0,0.0,0, 2.5), byrow=T, ncol=5)
nx<-50000
ny<-50000
X<-rmvnorm(nx, mean=muX, sigma=SigmaX)
Y<-rmvnorm(ny, mean=muY, sigma=SigmaY)
cntr<-exp(Y)
case<-exp(X)
#obtain max and min data
Xmin<-0
Xmax<-0
Ymin<-0
Ymax<-0
for (i in 1:nx){
Xmin[i]<-min(case[i,])
Xmax[i]<-max(case[i,])}
for (i in 1:ny){
Ymin[i]<-min(cntr[i,])
Ymax[i]<-max(cntr[i,])}
###minimax solution
a<-optimize(minmax, lower=-500, upper=500, maximum=TRUE)
c1<-a$maximum
AUC1<-a$objective
c(c1, AUC1)
### Empirical linear combination solution
a<-optim(rep(0,p-1), Npcom, X=case, Y=cntr, hessian=T,
control=list(maxit=20000))
c2<-a$par
AUC2<-1-a$value
c(c2, AUC2)
###Normal linear combination
n<-dim(case)[1]
m<-dim(cntr)[1]
p<-dim(case)[2]
cx<-0
for (i in 1:p)
{cx[i]<-mean(case[,i])}
cy<-0
for (i in 1:p)
{cy[i]<-mean(cntr[,i])}
muX<-matrix(cx,ncol=p,byrow=T)
muY<-matrix(cy,ncol=p,byrow=T)
SigmaX<-var(case,na.rm=T)
SigmaY<-var(cntr,na.rm=T)

#best linear combinations
dmu<-muX-muY
Sigma<-SigmaX+SigmaY
c3<-solve(Sigma)%*%t(dmu)
AUC3<-pnorm(sqrt(dmu%*%c3))
c(c3, AUC3)
##################################
#Making figure
FPF<-seq(0,1,0.001)
###minimax
xx<-Xmax+c1*Xmin
yy<-Ymax+c1*Ymin
TPF1<-sen(xx,yy,FPF)
### empirical linear comb
a<-matrix(c(1,c2), byrow=T, ncol=1)
xx<-as.vector(case%*%a)
yy<-as.vector(cntr%*%a)
TPF2<-sen(xx,yy,FPF)
#### empirical normal linear
xx<-as.vector(case%*%c3)
yy<-as.vector(cntr%*%c3)
TPF3<-sen(xx,yy,FPF)
yl<-min(TPF1, TPF2, TPF3)
yu<-max(TPF1, TPF2, TPF3)
plot(FPF, TPF1, lty=1, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF2, lty=2, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF3, lty=3, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
###################################################
###################################################

#Below is figure 4 in Section 4.4
### Lognormal-Gamma with unequal variance
####################################################
####################################################
p<-5
muX<-matrix(c(0.5, 0.6, 0.7, 0.8, 1),ncol=p,byrow=T)
muY<-matrix(rep(0,p),ncol=p,byrow=T)
SigmaX<-matrix(c(1.0,0,0,0, -1.0,0,0.5, 0, 0,0, 0,0, 0.5,0, 0,
0,0,0,0.5, 0, -1.0, 0,0,0, 2.5), byrow=T, ncol=5)
SigmaY<-matrix(c(1,0,0.0,0,0.0,0,2,0.0,0,0.0,0,0,1.5,0,0.0,
0,0,0.0,3,0.0,0,0,0.0,0, 2.5), byrow=T, ncol=5)
nx<-50000
ny<-50000
X<-rmvnorm(nx, mean=muX, sigma=SigmaX)
Y<-rmvnorm(ny, mean=muY, sigma=SigmaY)
case<-rep(0, nx*p)
dim(case)<-c(nx,p)
cntr<-rep(0, ny*p)
dim(cntr)<-c(ny,p)
XX<-exp(X)
YY<-exp(Y)
case[,1]<-XX[,1]+rgamma(nx, shape=0.1)
case[,2]<-XX[,2]+rgamma(nx, shape=0.1)
case[,3]<-XX[,3]+rgamma(nx, shape=0.1)
case[,4]<-XX[,4]+rgamma(nx, shape=0.1)
case[,5]<-XX[,5]+rgamma(nx, shape=0.1)
cntr[,1]<-YY[,1]+rgamma(ny, shape=0.1)
cntr[,2]<-YY[,2]+rgamma(ny, shape=0.2)
cntr[,3]<-YY[,3]+rgamma(ny, shape=0.3)
cntr[,4]<-YY[,4]+rgamma(ny, shape=0.4)
cntr[,5]<-YY[,5]+rgamma(ny, shape=0.5)
#obtain max and min data
Xmin<-0
Xmax<-0
Ymin<-0
Ymax<-0
for (i in 1:nx){
Xmin[i]<-min(case[i,])
Xmax[i]<-max(case[i,])}
for (i in 1:ny){
Ymin[i]<-min(cntr[i,])
Ymax[i]<-max(cntr[i,])}

########################################
#############################################
###minimax solution
a<-optimize(minmax, lower=-500, upper=500, maximum=TRUE)
c1<-a$maximum
AUC1<-a$objective
c(c1, AUC1)
#c1 AUC1=1.1960666 0.8608711
### Empirical linear combination solution
a<-optim(rep(0,p-1), Npcom, X=case, Y=cntr, hessian=T,
control=list(maxit=20000))
c2<-a$par
AUC2<-1-a$value
c(c2, AUC2)
#=0.8040231 0.9470411 1.0838207 0.6650017 0.9386253
###Normal linear combination
n<-dim(case)[1]
m<-dim(cntr)[1]
p<-dim(case)[2]
cx<-0
for (i in 1:p)
{cx[i]<-mean(case[,i])}
cy<-0
for (i in 1:p)
{cy[i]<-mean(cntr[,i])}
muX<-matrix(cx,ncol=p,byrow=T)
muY<-matrix(cy,ncol=p,byrow=T)
SigmaX<-var(case,na.rm=T)
SigmaY<-var(cntr,na.rm=T)
#best linear combinations
dmu<-muX-muY
Sigma<-SigmaX+SigmaY
c3<-solve(Sigma)%*%t(dmu)
AUC3<-pnorm(sqrt(dmu%*%c3))
c(c3, AUC3)
#=0.7472859 0.6011276 0.7124475 0.8107789 0.4964150 0.9386739
##################################
#Making Figure
FPF<-seq(0,1,0.001)
###ROC Plots
###minimax
xx<-Xmax+c1*Xmin
yy<-Ymax+c1*Ymin
TPF1<-sen(xx,yy,FPF)

### empirical linear comb
a<-matrix(c(1,c2), byrow=T, ncol=1)
xx<-as.vector(case%*%a)
yy<-as.vector(cntr%*%a)
TPF2<-sen(xx,yy,FPF)
#### empirical normal linear
xx<-as.vector(case%*%c3)
yy<-as.vector(cntr%*%c3)
TPF3<-sen(xx,yy,FPF)
yl<-min(TPF1, TPF2, TPF3)
yu<-max(TPF1, TPF2, TPF3)
plot(FPF, TPF1, lty=1, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF2, lty=2, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF3, lty=3, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")