# Biostatistics and Bioinformatics Branch (BBB)

## A min–max combination of biomarkers to improve diagnostic accuracy

### MinMaxROC-Erratum-R-Codesl.R

#MinMaxROC-Erratum-R-Codesl.txt
###################################
#Install R 2.13 and the package mvtnorm
#########################################
library(mvtnorm)
###############################################
####Run the following functions
####Need to define Xmin, Xmax, Ymin, Ymax
#####before calling minimax
##########################################
#function for minmax combination data
#xx=case data yy=control data
minmax<-function(para){
xx<-Xmax+para*Xmin
yy<-Ymax+para*Ymin
a<-sum(wilcox.test(xx, yy)\$statistic)/nx
auc<-a/ny
return(auc)
}
#function for Nonparametric linear combination
#X-case Y-Control
Npcom<-function(para, X, Y){
a<-matrix(c(1,para), byrow=T, ncol=1)
xx<-as.vector(X%*%a)
yy<-as.vector(Y%*%a)
b<-sum(wilcox.test(yy, xx)\$statistic)/nx
#use xx after yy because optim minimizes
bb<-b/ny
return(bb)
}
#function for empirical sensitivity
#x-case y-control
sen<-function(x,y, FPF){
qy<-quantile(y,1-FPF)
m<-length(FPF)
TPF<-0
for (i in 1:m){
TPF[i]<-mean(x>qy[i])
}
TPF}
#################################################
####Normal data that give Figure 1 in Section 4.1
################################################
p<-5
muX<-matrix(c(0.5, 0.6, 0.7, 0.8, 1),ncol=p,byrow=T)
muY<-matrix(rep(0,p),ncol=p,byrow=T)
SigmaX<-matrix(c(1.0,0.0,0.0,0.0,-1.0,
0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,
0.0,0.0,0.0,0.5,0.0,-1.0,0.0,0.0,0.0,2.5), byrow=T, ncol=5)
SigmaY<-SigmaX
nx<-50000
ny<-50000
X<-rmvnorm(nx, mean=muX, sigma=SigmaX)
Y<-rmvnorm(ny, mean=muY, sigma=SigmaY)
###define data for cases and data for controls
cntr<-Y
case<-X
#obtain max and min data
Xmin<-0
Xmax<-0
Ymin<-0
Ymax<-0
for (i in 1:nx){
Xmin[i]<-min(case[i,])
Xmax[i]<-max(case[i,])}
for (i in 1:ny){
Ymin[i]<-min(cntr[i,])
Ymax[i]<-max(cntr[i,])}
###############################
#Solutions
###############################
################################
#ROC curves and areas are empirical based
#on data, not on the theoretical distribution
###############################
###minimax solution
a<-optimize(minmax, lower=-500, upper=500, maximum=TRUE)
c1<-a\$maximum
AUC1<-a\$objective
c(c1, AUC1)
### Empirical linear combination solution
a<-optim(rep(0,p-1), Npcom, X=case, Y=cntr, hessian=T,
control=list(maxit=20000))
c2<-a\$par
AUC2<-1-a\$value
c(c2, AUC2)
###Normal linear combination
n<-dim(case)[1]

m<-dim(cntr)[1]
p<-dim(case)[2]
cx<-0
for (i in 1:p)
{cx[i]<-mean(case[,i])}
cy<-0
for (i in 1:p)
{cy[i]<-mean(cntr[,i])}
muX<-matrix(cx,ncol=p,byrow=T)
muY<-matrix(cy,ncol=p,byrow=T)
SigmaX<-var(case,na.rm=T)
SigmaY<-var(cntr,na.rm=T)
#best linear combinations
dmu<-muX-muY
Sigma<-SigmaX+SigmaY
c3<-solve(Sigma)%*%t(dmu)
AUC3<-pnorm(sqrt(dmu%*%c3))
c(c3, AUC3)
##################################
#Making figure
FPF<-seq(0,1,0.001)
###minimax
xx<-Xmax+c1*Xmin
yy<-Ymax+c1*Ymin
TPF1<-sen(xx,yy,FPF)
### empirical linear comb
a<-matrix(c(1,c2), byrow=T, ncol=1)
xx<-as.vector(case%*%a)
yy<-as.vector(cntr%*%a)
TPF2<-sen(xx,yy,FPF)
#### empirical normal linear
xx<-as.vector(case%*%c3)
yy<-as.vector(cntr%*%c3)
TPF3<-sen(xx,yy,FPF)
yl<-min(TPF1, TPF2, TPF3)
yu<-max(TPF1, TPF2, TPF3)
plot(FPF, TPF1, lty=1, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF", ylab="TPF")
par(new=T)
plot(FPF, TPF2, lty=2, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF", ylab="TPF")
par(new=T)
plot(FPF, TPF3, lty=3, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",ylab="TPF")
#######################################################
#######################################################
#######################################################
#############################
###Generate Data for other figures
###############################
###################################################
###################################################
#Below yields figure 2 in Section 4.2
### Normal with unequal variance
####################################################
####################################################
###############################
p<-5
muX<-matrix(c(0.5, 0.6, 0.7, 0.8, 1),ncol=p,byrow=T)
muY<-matrix(rep(0,p),ncol=p,byrow=T)
SigmaX<-matrix(c(1.0,0,0,0, -1.0,0,0.5, 0, 0,0, 0,0, 0.5,0, 0,
0,0,0,0.5, 0, -1.0, 0,0,0, 2.5), byrow=T, ncol=5)
SigmaY<-matrix(c(1,0,0.0,0,0.0,0,2,0.0,0,0.0,0,0,1.5,0,0.0,
0,0,0.0,3,0.0,0,0,0.0,0, 2.5), byrow=T, ncol=5)
nx<-50000
ny<-50000
X<-rmvnorm(nx, mean=muX, sigma=SigmaX)
Y<-rmvnorm(ny, mean=muY, sigma=SigmaY)
###define data for cases and data for controls
cntr<-Y
case<-X
#obtain max and min data
Xmin<-0
Xmax<-0
Ymin<-0
Ymax<-0

for (i in 1:nx){
Xmin[i]<-min(case[i,])
Xmax[i]<-max(case[i,])}
for (i in 1:ny){
Ymin[i]<-min(cntr[i,])
Ymax[i]<-max(cntr[i,])}
###minimax solution
a<-optimize(minmax, lower=-500, upper=500, maximum=TRUE)
c1<-a\$maximum
AUC1<-a\$objective
c(c1, AUC1)
### Empirical linear combination solution
a<-optim(rep(0,p-1), Npcom, X=case, Y=cntr, hessian=T,
control=list(maxit=20000))
c2<-a\$par
AUC2<-1-a\$value
c(c2, AUC2)
###Normal linear combination
n<-dim(case)[1]
m<-dim(cntr)[1]
p<-dim(case)[2]
cx<-0
for (i in 1:p)
{cx[i]<-mean(case[,i])}
cy<-0
for (i in 1:p)
{cy[i]<-mean(cntr[,i])}
muX<-matrix(cx,ncol=p,byrow=T)
muY<-matrix(cy,ncol=p,byrow=T)
SigmaX<-var(case,na.rm=T)
SigmaY<-var(cntr,na.rm=T)
#best linear combinations
dmu<-muX-muY
Sigma<-SigmaX+SigmaY
c3<-solve(Sigma)%*%t(dmu)
AUC3<-pnorm(sqrt(dmu%*%c3))
c(c3, AUC3)
##################################
#Making figure
FPF<-seq(0,1,0.001)
###minimax
xx<-Xmax+c1*Xmin
yy<-Ymax+c1*Ymin

TPF1<-sen(xx,yy,FPF)
### empirical linear comb
a<-matrix(c(1,c2), byrow=T, ncol=1)
xx<-as.vector(case%*%a)
yy<-as.vector(cntr%*%a)
TPF2<-sen(xx,yy,FPF)
#### empirical normal linear
xx<-as.vector(case%*%c3)
yy<-as.vector(cntr%*%c3)
TPF3<-sen(xx,yy,FPF)
yl<-min(TPF1, TPF2, TPF3)
yu<-max(TPF1, TPF2, TPF3)
plot(FPF, TPF1, lty=1, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF2, lty=2, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF3, lty=3, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
###################################################
###################################################
#Below is figure 3 in Section 4.3
####################################################
####################################################
### Log-Normal with unequal variance
##################################
p<-5
muX<-matrix(c(0.5, 0.6, 0.7, 0.8, 1),ncol=p,byrow=T)
muY<-matrix(rep(0,p),ncol=p,byrow=T)
SigmaX<-matrix(c(1.0,0,0,0, -1.0,0,0.5, 0, 0,0, 0,0, 0.5,0, 0,
0,0,0,0.5, 0, -1.0, 0,0,0, 2.5), byrow=T, ncol=5)
SigmaY<-matrix(c(1,0,0.0,0,0.0,0,2,0.0,0,0.0,0,0,1.5,0,0.0,

0,0,0.0,3,0.0,0,0,0.0,0, 2.5), byrow=T, ncol=5)
nx<-50000
ny<-50000
X<-rmvnorm(nx, mean=muX, sigma=SigmaX)
Y<-rmvnorm(ny, mean=muY, sigma=SigmaY)
cntr<-exp(Y)
case<-exp(X)
#obtain max and min data
Xmin<-0
Xmax<-0
Ymin<-0
Ymax<-0
for (i in 1:nx){
Xmin[i]<-min(case[i,])
Xmax[i]<-max(case[i,])}
for (i in 1:ny){
Ymin[i]<-min(cntr[i,])
Ymax[i]<-max(cntr[i,])}
###minimax solution
a<-optimize(minmax, lower=-500, upper=500, maximum=TRUE)
c1<-a\$maximum
AUC1<-a\$objective
c(c1, AUC1)
### Empirical linear combination solution
a<-optim(rep(0,p-1), Npcom, X=case, Y=cntr, hessian=T,
control=list(maxit=20000))
c2<-a\$par
AUC2<-1-a\$value
c(c2, AUC2)
###Normal linear combination
n<-dim(case)[1]
m<-dim(cntr)[1]
p<-dim(case)[2]
cx<-0
for (i in 1:p)
{cx[i]<-mean(case[,i])}
cy<-0
for (i in 1:p)
{cy[i]<-mean(cntr[,i])}
muX<-matrix(cx,ncol=p,byrow=T)
muY<-matrix(cy,ncol=p,byrow=T)
SigmaX<-var(case,na.rm=T)
SigmaY<-var(cntr,na.rm=T)

#best linear combinations
dmu<-muX-muY
Sigma<-SigmaX+SigmaY
c3<-solve(Sigma)%*%t(dmu)
AUC3<-pnorm(sqrt(dmu%*%c3))
c(c3, AUC3)
##################################
#Making figure
FPF<-seq(0,1,0.001)
###minimax
xx<-Xmax+c1*Xmin
yy<-Ymax+c1*Ymin
TPF1<-sen(xx,yy,FPF)
### empirical linear comb
a<-matrix(c(1,c2), byrow=T, ncol=1)
xx<-as.vector(case%*%a)
yy<-as.vector(cntr%*%a)
TPF2<-sen(xx,yy,FPF)
#### empirical normal linear
xx<-as.vector(case%*%c3)
yy<-as.vector(cntr%*%c3)
TPF3<-sen(xx,yy,FPF)
yl<-min(TPF1, TPF2, TPF3)
yu<-max(TPF1, TPF2, TPF3)
plot(FPF, TPF1, lty=1, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF2, lty=2, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF3, lty=3, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
###################################################
###################################################

#Below is figure 4 in Section 4.4
### Lognormal-Gamma with unequal variance
####################################################
####################################################
p<-5
muX<-matrix(c(0.5, 0.6, 0.7, 0.8, 1),ncol=p,byrow=T)
muY<-matrix(rep(0,p),ncol=p,byrow=T)
SigmaX<-matrix(c(1.0,0,0,0, -1.0,0,0.5, 0, 0,0, 0,0, 0.5,0, 0,
0,0,0,0.5, 0, -1.0, 0,0,0, 2.5), byrow=T, ncol=5)
SigmaY<-matrix(c(1,0,0.0,0,0.0,0,2,0.0,0,0.0,0,0,1.5,0,0.0,
0,0,0.0,3,0.0,0,0,0.0,0, 2.5), byrow=T, ncol=5)
nx<-50000
ny<-50000
X<-rmvnorm(nx, mean=muX, sigma=SigmaX)
Y<-rmvnorm(ny, mean=muY, sigma=SigmaY)
case<-rep(0, nx*p)
dim(case)<-c(nx,p)
cntr<-rep(0, ny*p)
dim(cntr)<-c(ny,p)
XX<-exp(X)
YY<-exp(Y)
case[,1]<-XX[,1]+rgamma(nx, shape=0.1)
case[,2]<-XX[,2]+rgamma(nx, shape=0.1)
case[,3]<-XX[,3]+rgamma(nx, shape=0.1)
case[,4]<-XX[,4]+rgamma(nx, shape=0.1)
case[,5]<-XX[,5]+rgamma(nx, shape=0.1)
cntr[,1]<-YY[,1]+rgamma(ny, shape=0.1)
cntr[,2]<-YY[,2]+rgamma(ny, shape=0.2)
cntr[,3]<-YY[,3]+rgamma(ny, shape=0.3)
cntr[,4]<-YY[,4]+rgamma(ny, shape=0.4)
cntr[,5]<-YY[,5]+rgamma(ny, shape=0.5)
#obtain max and min data
Xmin<-0
Xmax<-0
Ymin<-0
Ymax<-0
for (i in 1:nx){
Xmin[i]<-min(case[i,])
Xmax[i]<-max(case[i,])}
for (i in 1:ny){
Ymin[i]<-min(cntr[i,])
Ymax[i]<-max(cntr[i,])}

########################################
#############################################
###minimax solution
a<-optimize(minmax, lower=-500, upper=500, maximum=TRUE)
c1<-a\$maximum
AUC1<-a\$objective
c(c1, AUC1)
#c1 AUC1=1.1960666 0.8608711
### Empirical linear combination solution
a<-optim(rep(0,p-1), Npcom, X=case, Y=cntr, hessian=T,
control=list(maxit=20000))
c2<-a\$par
AUC2<-1-a\$value
c(c2, AUC2)
#=0.8040231 0.9470411 1.0838207 0.6650017 0.9386253
###Normal linear combination
n<-dim(case)[1]
m<-dim(cntr)[1]
p<-dim(case)[2]
cx<-0
for (i in 1:p)
{cx[i]<-mean(case[,i])}
cy<-0
for (i in 1:p)
{cy[i]<-mean(cntr[,i])}
muX<-matrix(cx,ncol=p,byrow=T)
muY<-matrix(cy,ncol=p,byrow=T)
SigmaX<-var(case,na.rm=T)
SigmaY<-var(cntr,na.rm=T)
#best linear combinations
dmu<-muX-muY
Sigma<-SigmaX+SigmaY
c3<-solve(Sigma)%*%t(dmu)
AUC3<-pnorm(sqrt(dmu%*%c3))
c(c3, AUC3)
#=0.7472859 0.6011276 0.7124475 0.8107789 0.4964150 0.9386739
##################################
#Making Figure
FPF<-seq(0,1,0.001)
###ROC Plots
###minimax
xx<-Xmax+c1*Xmin
yy<-Ymax+c1*Ymin
TPF1<-sen(xx,yy,FPF)

### empirical linear comb
a<-matrix(c(1,c2), byrow=T, ncol=1)
xx<-as.vector(case%*%a)
yy<-as.vector(cntr%*%a)
TPF2<-sen(xx,yy,FPF)
#### empirical normal linear
xx<-as.vector(case%*%c3)
yy<-as.vector(cntr%*%c3)
TPF3<-sen(xx,yy,FPF)
yl<-min(TPF1, TPF2, TPF3)
yu<-max(TPF1, TPF2, TPF3)
plot(FPF, TPF1, lty=1, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF2, lty=2, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")
par(new=T)
plot(FPF, TPF3, lty=3, type="l", xlim<-c(0,1), ylim<-c(yl, yu),
xlab="FPF",
ylab="TPF")