Because of a lapse in government funding, the information on this website may not be up to date, transactions submitted via the website may not be processed, and the agency may not be able to respond to inquiries until appropriations are enacted.

The NIH Clinical Center (the research hospital of NIH) is open. For more details about its operating status, please visit cc.nih.gov.

Updates regarding government operating status and resumption of normal operations can be found at OPM.gov.

Marginal analysis of measurement agreement among multiple raters with non-ignorable missing

R_function_file.R

#This file contains the deleting data by both MCAR and Non-Ignorable 1

#function FLSkp computes Fleiss Kappa of given data set
#this function works for binary data only
FLSkp = function(B){
     #B is the matrix containing binary ratings (0,1) with row standing for subjects and column standing for raters
     A=B+1 #Matrix A change binary labelling from (0,1) to (1,2)
     N=nrow(A) #number of subjects
     n=ncol(A) #number of ratings per subject
     R=matrix(0,nrow=N, ncol=2)
     for (i in 1:N){
          for (j in 1:2){
               R[i,j]=sum(A[i,]==j)
          }
     }
     p=colSums(R)/(N*n)
     kp=(sum(R^2)-N*n*(1+(n-1)*sum(p^2)))/(N*n*(n-1)*(1-sum(p^2)))
     FLSvar=2*(sum(p^2)-(2*n-3)*(sum(p^2))^2+2*(n-2)*sum(p^3))/(N*n*(n-1)*((1-sum(p^2))^2))

     r=rowSums(A-1)
     pi1=mean(r/n) # R-1 changes binary rating from (1,2) back to (0,1)
     pi0=mean((n-r)/n )
     pe_pi=pi0^2+pi1^2
     pa=mean(r*(r-1)/(n*(n-1)) + (n-r)*(n-r-1)/(n*(n-1)))
     gamma_pi=(pa-pe_pi)/(1-pe_pi)
     #return(c(pe_pi,pa))


#the following method is based on Gwet (2008)
     GM=matrix(NA,nrow=N, ncol=2)
     for (i in 1:N){
          pa_i=r[i]*(r[i]-1)/(n*(n-1)) + (n-r[i])*(n-r[i]-1)/(n*(n-1))
          pe_pi_i=pi1*r[i]/n + pi0*(n-r[i])/n
          gamma_pi_i=(pa_i-pe_pi)/(1-pe_pi)
          gamma_pi_i_star=gamma_pi_i-2*(1-gamma_pi)*((pe_pi_i-pe_pi)/(1-pe_pi))
          GM[i,1]=gamma_pi_i_star
          GM[i,2]=(gamma_pi_i_star-gamma_pi)^2
     }

     VGM=sum(GM[,2])/(N*(N-1))
     return(c(kp,gamma_pi,FLSvar,VGM))
}

 

##################################################################
############################ 8 raters ############################
##################################################################

#function sim generates data
sim=function(p,r, N, p2_1,p3_1,p4_1,p5_1,p6_1,p7_1,p8_1){
     #We require r=4.
     #p is the marginal probability
     #N is the sample size
     #p2_1=P(Y2=1|Y1=1); p3_1=P(Y3=1|Y1=1)
     #r is the number of raters
     X=matrix(NA, nrow=N, ncol=r)
     for (i in 1:N){
          X[i,1]=rbinom(1,1,p)
          if (X[i,1]==1){X[i,2]=rbinom(1,1,p2_1)}
          if (X[i,1]==0){X[i,2]=rbinom(1,1,((p-p*p2_1)/(1-p)))}

          if (X[i,1]==1){X[i,3]=rbinom(1,1,p3_1)}
          if (X[i,1]==0){X[i,3]=rbinom(1,1,((p-p*p3_1)/(1-p)))}

          if (X[i,1]==1){X[i,4]=rbinom(1,1,p4_1)}
          if (X[i,1]==0){X[i,4]=rbinom(1,1,((p-p*p4_1)/(1-p)))}

          if (X[i,1]==1){X[i,5]=rbinom(1,1,p5_1)}
          if (X[i,1]==0){X[i,5]=rbinom(1,1,((p-p*p5_1)/(1-p)))}

          if (X[i,1]==1){X[i,6]=rbinom(1,1,p6_1)}
          if (X[i,1]==0){X[i,6]=rbinom(1,1,((p-p*p6_1)/(1-p)))}

          if (X[i,1]==1){X[i,7]=rbinom(1,1,p7_1)}
          if (X[i,1]==0){X[i,7]=rbinom(1,1,((p-p*p7_1)/(1-p)))}

          if (X[i,1]==1){X[i,8]=rbinom(1,1,p8_1)}
          if (X[i,1]==0){X[i,8]=rbinom(1,1,((p-p*p8_1)/(1-p)))}

     }
     #JNT stands for joint probability
     JNT12=p*p2_1
     JNT13=p*p3_1
     JNT14=p*p4_1
     JNT15=p*p5_1
     JNT16=p*p6_1
     JNT17=p*p7_1
     JNT18=p*p8_1



     #p2_10=P(Y2=1|Y1=0), p3_10=P(Y3=1|Y1=0), p4_10=P(Y4=1|Y1=0)
     p2_10=(p/(1-p))*(1-JNT12/p)
     p3_10=(p/(1-p))*(1-JNT13/p)
     p4_10=(p/(1-p))*(1-JNT14/p)
     p5_10=(p/(1-p))*(1-JNT15/p)
     p6_10=(p/(1-p))*(1-JNT16/p)
     p7_10=(p/(1-p))*(1-JNT17/p)
     p8_10=(p/(1-p))*(1-JNT18/p)


     JNT23=p2_1*p3_1*p+p2_10*p3_10*(1-p)
     JNT24=p2_1*p4_1*p+p2_10*p4_10*(1-p)
     JNT25=p2_1*p5_1*p+p2_10*p5_10*(1-p)
     JNT26=p2_1*p6_1*p+p2_10*p6_10*(1-p)
     JNT27=p2_1*p7_1*p+p2_10*p7_10*(1-p)
     JNT28=p2_1*p8_1*p+p2_10*p8_10*(1-p)

     JNT34=p3_1*p4_1*p+p3_10*p4_10*(1-p)
     JNT35=p3_1*p5_1*p+p3_10*p5_10*(1-p)
     JNT36=p3_1*p6_1*p+p3_10*p6_10*(1-p)
     JNT37=p3_1*p7_1*p+p3_10*p7_10*(1-p)
     JNT38=p3_1*p8_1*p+p3_10*p8_10*(1-p)

     JNT45=p4_1*p5_1*p+p4_10*p5_10*(1-p)
     JNT46=p4_1*p6_1*p+p4_10*p6_10*(1-p)
     JNT47=p4_1*p7_1*p+p4_10*p7_10*(1-p)
     JNT48=p4_1*p8_1*p+p4_10*p8_10*(1-p)

     JNT56=p5_1*p6_1*p+p5_10*p6_10*(1-p)
     JNT57=p5_1*p7_1*p+p5_10*p7_10*(1-p)
     JNT58=p5_1*p8_1*p+p5_10*p8_10*(1-p)

     JNT67=p6_1*p7_1*p+p6_10*p7_10*(1-p)
     JNT68=p6_1*p8_1*p+p6_10*p8_10*(1-p)

     JNT78=p7_1*p8_1*p+p7_10*p8_10*(1-p)

     cor12=(JNT12-p*p)/(p*(1-p))
     cor13=(JNT13-p*p)/(p*(1-p))
     cor23=(JNT23-p*p)/(p*(1-p))


     JNT0_12=(1-p2_10)*(1-p)
     JNT0_13=(1-p3_10)*(1-p)
     JNT0_14=(1-p4_10)*(1-p)
     JNT0_15=(1-p5_10)*(1-p)
     JNT0_16=(1-p6_10)*(1-p)
     JNT0_17=(1-p7_10)*(1-p)
     JNT0_18=(1-p8_10)*(1-p)


     JNT0_23=(1-p2_1)*(1-p3_1)*p+(1-p2_10)*(1-p3_10)*(1-p)
     JNT0_24=(1-p2_1)*(1-p4_1)*p+(1-p2_10)*(1-p4_10)*(1-p)
     JNT0_25=(1-p2_1)*(1-p5_1)*p+(1-p2_10)*(1-p5_10)*(1-p)
     JNT0_26=(1-p2_1)*(1-p6_1)*p+(1-p2_10)*(1-p6_10)*(1-p)
     JNT0_27=(1-p2_1)*(1-p7_1)*p+(1-p2_10)*(1-p7_10)*(1-p)
     JNT0_28=(1-p2_1)*(1-p8_1)*p+(1-p2_10)*(1-p8_10)*(1-p)

     JNT0_34=(1-p3_1)*(1-p4_1)*p+(1-p3_10)*(1-p4_10)*(1-p)
     JNT0_35=(1-p3_1)*(1-p5_1)*p+(1-p3_10)*(1-p5_10)*(1-p)
     JNT0_36=(1-p3_1)*(1-p6_1)*p+(1-p3_10)*(1-p6_10)*(1-p)
     JNT0_37=(1-p3_1)*(1-p7_1)*p+(1-p3_10)*(1-p7_10)*(1-p)
     JNT0_38=(1-p3_1)*(1-p8_1)*p+(1-p3_10)*(1-p8_10)*(1-p)

     JNT0_45=(1-p4_1)*(1-p5_1)*p+(1-p4_10)*(1-p5_10)*(1-p)
     JNT0_46=(1-p4_1)*(1-p6_1)*p+(1-p4_10)*(1-p6_10)*(1-p)
     JNT0_47=(1-p4_1)*(1-p7_1)*p+(1-p4_10)*(1-p7_10)*(1-p)
     JNT0_48=(1-p4_1)*(1-p8_1)*p+(1-p4_10)*(1-p8_10)*(1-p)

     JNT0_56=(1-p5_1)*(1-p6_1)*p+(1-p5_10)*(1-p6_10)*(1-p)
     JNT0_57=(1-p5_1)*(1-p7_1)*p+(1-p5_10)*(1-p7_10)*(1-p)
     JNT0_58=(1-p5_1)*(1-p8_1)*p+(1-p5_10)*(1-p8_10)*(1-p)

     JNT0_67=(1-p6_1)*(1-p7_1)*p+(1-p6_10)*(1-p7_10)*(1-p)
     JNT0_68=(1-p6_1)*(1-p8_1)*p+(1-p6_10)*(1-p8_10)*(1-p)

     JNT0_78=(1-p7_1)*(1-p8_1)*p+(1-p7_10)*(1-p8_10)*(1-p)


     FLSa= (JNT12+JNT13+JNT14+JNT15+JNT16+JNT17+JNT18+
                               JNT23+JNT24+JNT25+JNT26+JNT27+JNT28+
                                            JNT34+JNT35+JNT36+JNT37+JNT38+
                                                         JNT45+JNT46+JNT47+JNT48+
                                                                      JNT56+JNT57+JNT58+
                                                                                   JNT67+JNT68+
                                                                                                JNT78+
                  JNT0_12+JNT0_13+JNT0_14+JNT0_15+JNT0_16+JNT0_17+JNT0_18+
                                   JNT0_23+JNT0_24+JNT0_25+JNT0_26+JNT0_27+JNT0_28+
                                                    JNT0_34+JNT0_35+JNT0_36+JNT0_37+JNT0_38+
                                                                     JNT0_45+JNT0_46+JNT0_47+JNT0_48+
                                                                                      JNT0_56+JNT0_57+JNT0_58+
                                                                                                       JNT0_67+JNT0_68+
                                                                                                                         JNT0_78)

     FLSe=28*(p^2+(1-p)^2)

     FLS=(FLSa-FLSe)/((r*(r-1)/2)-FLSe)

#return(list(Data=X, EMcor12=cor(X[,1],X[,2]), EMcor13=cor(X[,1],X[,3]),EMcor23=cor(X[,2],X[,3]), cor12=cor12,cor13=cor13, cor23=cor23))

return(list(Data=X, FLS=FLS))
}

#A1=sim(p=0.7,r=8, N=100, p2_1=0.75,p3_1=0.85,p4_1=0.8,p5_1=0.9,p6_1=0.75,p7_1=0.85,p8_1=0.8) #kappa=0.2
#A1=sim(p=0.65,r=8, N=100, p2_1=0.95,p3_1=0.85,p4_1=0.8,p5_1=0.9,p6_1=0.85,p7_1=0.95,p8_1=0.89) #kappa=0.5
#A1=sim(p=0.35,r=8, N=100, p2_1=0.9,p3_1=0.95,p4_1=0.9,p5_1=0.9,p6_1=0.95,p7_1=0.95,p8_1=0.9) #kappa=0.8

 

#set.seed(1)
#A1=sim(0.7,4, 10000,0.8,0.9)

#A1$cor12
#A1$EMcor12
#A1$cor13
#A1$EMcor13
#A1$cor23
#A1$EMcor23

#colSums(A1$Data)/nrow(A1$Data)

 

#function resample does resampling from data and compute kappa
resample = function(A){
     N=nrow(A)
     R = matrix (NA, nrow=N, ncol=2)
     for (n in 1:N){
          #do the resampling on the subjects with two or more ratings
          if (ncol(A)-sum(is.na(A[n,]))>2){
               index=which(is.na(A[n,])==FALSE)
               R[n,]=A[n,sample(index,2,replace = FALSE)]
                    }
          else{R[n,]=A[n,which(is.na(A[n,])==FALSE)]}
          }

     kp=FLSkp(R) #when applying to two raters only, Fleiss Kappa simplifies to scott's pi'
     kphat=kp[1]
     kpvar=kp[4]

# tb=table(R[,1],R[,2])
# Pa=(tb[1,1]+tb[2,2])/nrow(R)
# Pe=(mean(sum(tb[1,]),sum(tb[,1]))/nrow(R))^2+(mean(sum(tb[2,]),sum(tb[,2]))/nrow(R))^2
# scott=(Pa-Pe)/(1-Pe)

     return(list(R=R,kphat=kphat,kpvar=kpvar))
}

 

resampleMN=function(A, J){
#J is the number of times of resampling
B=matrix(NA, nrow=J,ncol=2)
for (j in 1:J){
     stat=resample(A)
     B[j,]=cbind(stat$kphat,stat$kpvar)
     }

     #the formula is for var(sqrt(N)(betahat-beta)). Thus to compute var(betahat), we should NOT include sample size N.
# Sigma= mean(B[,2])
# Sigma=var(B[,1])*(J-1)/J
     Sigma=(mean(B[,2])-var(B[,1])*(J-1)/J)

return(list(B=B,kpbar=mean(B[,1]),EMvar=var(B[,1]),THvar=Sigma))
}

#A=rbind(
#cbind( 1 , 0 , 1 , NA ),
#cbind( 1 , 1 , 0 , 0 ),
#cbind( NA , 1 , 1 , NA ),
#cbind( 0 , 1 , NA , NA ),
#cbind( 0 , 0 , 0 , 0 ),
#cbind( NA , 1 , 1 , 1 ),
#cbind( 1 , NA , 0 , 0 ),
#cbind( NA , 0 , 0 , NA ),
#cbind( 1 , 1 , 1 , 1 ),
#cbind( 0 , 0 , 0 , NA ))

#function WGEE computes estimates based on weighted GEE ()
WGEE=function(A){
     N=nrow(A)
     R=ncol(A)
     CB=choose(R,2)
     S = matrix(0, nrow=N, ncol=CB)
     Y = matrix(0, nrow=N, ncol=R) #Y restores all the non missing value of A
     for (i in 1:N){
     #nonmissing is the collection of rater indicies of that subject
     nonmissing=which(is.na(A[i,])==FALSE)
     Y[i,1:length(nonmissing)]=A[i,nonmissing]/length(nonmissing)
     #index is the collection of all available rating pairs
     index=combn(nonmissing, 2)
     #for different i, j varies
     for (j in 1:ncol(index)){
          s=A[i,index[1,j]]+A[i,index[2,j]]
          S[i,j]=s*(2-s)/ncol(index)
     }

}
     po=1-mean(rowSums(S))
     pi=mean(rowSums(Y))
     pc=pi^2+(1-pi)^2
     kp=(po-pc)/(1-pc)

     #start computing the variance of kappa by CLT and delta method
     V1=rowSums(S)
     V2=rowSums(Y)
     V1bar=mean(V1)
     V2bar=mean(V2)
     Sigmahat=cov(cbind(V1,V2))
     dKdV1=-1/(2*V2bar-2*V2bar^2)
     dKdV2=V1bar*(1-2*V2bar)/(2*(V2bar^2)*((1-V2bar)^2) )
     dKdV=rbind(dKdV1, dKdV2)

     Vkp=t(dKdV)%*%Sigmahat%*%dKdV/N

     return(cbind(kp,Vkp) )
}

# #function EMkp computes the empirical kappa
# EMkp=function(p,r, N, p2_1,p3_1,p4_1,R,J){
# #p is the marginal probability
# #N is the sample size
# #p2_1=P(Y2=1|Y1=1); p3_1=P(Y3=1|Y1=1)
# #r is the number of raters
# #R is the number of replicates of the simulated data generated in order to compute the true Fleiss Kappa
# M=matrix(NA, nrow=R, ncol=2)
# for (m in 1:R){
# S=sim(p,r, N, p2_1,p3_1,p4_1)
# S1=S$Data
# M[m,]=cbind(kappam.fleiss(S1)$value, resampleMN(S1, J)$kphat)
# }
# return(M)
# }

 

#completeKP=EMkp(0.7,4, 100,0.8,0.9,0.85,10, 10)

#kpMN=mean(EMkp(0.7,4, 100,0.8,0.9,0.85,100000,1)[,1]) #check the mean of ONLY the first column of this output, it is can be considered true

#completeKP=EMkp(0.7,4, 100,0.8,0.9,0.85,1000, 1000)

#Delete1 is MCAR
Delete1 <- function(X,Pmiss){
     #p is the probability of assigning missing
     I=nrow(X)
     J=ncol(X)
     Y=X
     for (i in 1:I){
          #for each subject, randomly select half of the time points and delete by 0.5 probability
          for (j in sample(1:J,(J-2),replace=F)){if(rbinom(n=1,size=1,prob=Pmiss)==1){Y[i,j]=NA}}
     }
return(Y)
}

#Delete2 is non-ignorable 1st, depending on P(Y=1)
Delete2 <- function(X,Pmiss){
     #p is the probability of assigning missing
     I=nrow(X)
     J=ncol(X)
     Y=X
     for (i in 1:I){
          #for each subject, randomly select half of the time points and delete by 0.5 probability
          for (j in sample(1:J,(J-2),replace=F)){
               if(Y[i,j]==1){if(rbinom(n=1,size=1,prob=Pmiss)==1){Y[i,j]=NA}}
               }
     }
return(Y)
}

#Delete3 is non-ignorable 2nd, depending on the variablity of rating per subject
Delete3 <- function(X,a,b){
     #p is the probability of assigning missing
     I=nrow(X)
     J=ncol(X)
     Y=X
     for (i in 1:I){
          Pmiss=pnorm(a+b*var(Y[i,]))
        #for each subject, randomly select half of the time points and delete by 0.5 probability
        for (j in sample(1:J,(J-2),replace=F)){
               if(rbinom(n=1,size=1,prob=Pmiss)==1){Y[i,j]=NA}
               }
     }
return(Y)
}

#function EMkp computes the empirical kappa
EMmissingKP=function(p,r, N, p2_1,p3_1,p4_1,p5_1,p6_1,p7_1,p8_1,R,J,Pmiss,a,b){
     #p is the marginal probability
     #N is the sample size
     #p2_1=P(Y2=1|Y1=1); p3_1=P(Y3=1|Y1=1); p4_1=P(Y4=1|Y1=1)
     #r is the number of raters
     #R is the number of replicates of the simulated data generated in order to compute the true Fleiss Kappa
     M=matrix(NA, nrow=R, ncol=18)
     for (m in 1:R){
          S=sim(p,r, N, p2_1,p3_1,p4_1,p5_1,p6_1,p7_1,p8_1)
#delete MCAR
     S1=Delete1(S$Data,Pmiss)
     complete1=S1[apply(is.na(S1),1,sum)==0,] #complete extracts the rows without "NA"
     naive1=FLSkp(complete1)
     propose1=resampleMN(S1, J)
     WGEE1=WGEE(S1)

#delete depends on P(Y=1)
     S2=Delete2(S$Data,Pmiss)
     complete2=S2[apply(is.na(S2),1,sum)==0,] #complete extracts the rows without "NA"
     naive2=FLSkp(complete2)
     propose2=resampleMN(S2, J)
     WGEE2=WGEE(S2)

#Delete depends on the variablity of rating per subject
     S3=Delete3(S$Data,a,b)
     complete3=S3[apply(is.na(S3),1,sum)==0,] #complete extracts the rows without "NA"
     naive3=FLSkp(complete3)
     propose3=resampleMN(S3, J)
     WGEE3=WGEE(S3)


     M[m,]=cbind(naive1[1], naive1[4], propose1$kpbar, propose1$THvar,WGEE1[1],WGEE1[2],
                         naive2[1], naive2[4], propose2$kpbar, propose2$THvar,WGEE2[1],WGEE2[2],
                         naive3[1], naive3[4], propose3$kpbar, propose3$THvar,WGEE3[1],WGEE3[2])
     cat("[", m ,"] ",M[m,],"\n")
          }
     return(M)
}

 

 

p=as.double(commandArgs()[3])#marginal probability
r=as.double(commandArgs()[4])#number of raters
N=as.double(commandArgs()[5])#sample size
p2_1=as.double(commandArgs()[6])#P(Y2=1|Y1=1)
p3_1=as.double(commandArgs()[7])#P(Y3=1|Y1=1)
p4_1=as.double(commandArgs()[8])#P(Y4=1|Y1=1)
p5_1=as.double(commandArgs()[9])#P(Y5=1|Y1=1)
p6_1=as.double(commandArgs()[10])#P(Y6=1|Y1=1)
p7_1=as.double(commandArgs()[11])#P(Y7=1|Y1=1)
p8_1=as.double(commandArgs()[12])#P(Y8=1|Y1=1)

R=as.double(commandArgs()[13])#the number of replicates of the simulated data generated
J=as.double(commandArgs()[14])#the number of times of resampling
Pmiss=as.double(commandArgs()[15])#probability of assigning missingness
b=as.double(commandArgs()[16])#parameter determining probability of assigning missingness
a=-4

set.seed(1)

SimResult=EMmissingKP(p,r, N, p2_1,p3_1,p4_1,p5_1,p6_1,p7_1,p8_1,R,J,Pmiss,a,b)
FLS=round(sim(p,r, N, p2_1,p3_1,p4_1,p5_1,p6_1,p7_1,p8_1)$FLS,2)

filename = paste("/home/xiey5/Method2/result/8raterFLS",FLS,"Pmiss",Pmiss,"N",N,"b",b, ".txt",sep = "")

write.table(SimResult, filename,row.names=F,col.names=F)

#missingKP=EMmissingKP(0.7,4, 100,0.8,0.9,0.85,10, 10,0.3)

#missingKP=EMmissingKP(0.7,4, 100,0.8,0.9,0.85,1000, 1000, 0.3)

Back to Marginal analysis of measurement agreement among multiple raters with non-ignorable missing