#============================================================================================================ # # Licensing: # # Copyright Maria Grünewald and Ola Hössjer (2010) # # This code is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. See . # # This code is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # # Citation: # # The methodology used in this library is described in Grünewald & Hössjer (2011) A General Statistical Framework # for Multistage Designs, ... # Please refer to this paper when using the library. #============================================================================================================ # # Contents: # # This library computes and plots efficiencies for sequential design in logistic regression (Binary outcome) # For description of the model see Example 3 and Example 6 in # Grünewald & Hössjer (2010), A General Statistical Framework for Multistage Designs, ... # # Output: # # File "EfficiencyLogisticRegression.txt" saved to R working directory # containing settings and numerical results from simulation run # Figures shown in R Graphics devise # *Plots comparing cost scearios, two-stage sample (6 plots) # *Plots comparing ascertainment and two-stage (3 plots) # #============================================================================================================ # # The code is intended for use in the software R (version 2.10.0): # R Development Core Team (2008). R: A language and environment for # statistical computing. R Foundation for Statistical Computing, # Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org. # #============================================================================================================ #============================================================================================================ # Setting parameters for your calculations #============================================================================================================ #------------------------------------------------------------------------------------------------------------ # Parameters specified by user #------------------------------------------------------------------------------------------------------------ # Number of simulated individuals. This number does not refer to the number of subjects in the actual study, # but only affects how well the Monte Calo method approximates theoretical results. N<-500 # Parameter for x~Bin(k,p) # p=exp(alphax)/(1+exp(alphax)) alphax<--1 k<-1 # parameters for y~Bin(2,py) # py=exp(alpha+beta*x)/(1+exp(alpha+beta*x)) alpha<--2 beta<-2 # for sampling scheme with only two regions "cutpoint" determines these. # See function "weights" for further specification cutpoint<-0.5 # Specifying sampling probabilities P(J=2|y=1) pay0s<-c(0.00005,0.0005,0.005,0.02,0.04,0.06,0.08,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.95,0.975,1) #------------------------------------------------------------------------------------------------------------ # The following parameters follow from settings above #------------------------------------------------------------------------------------------------------------ # number of parameters in the model parnum<-3 # number of parameters in explanatory variable xparnum<-1 xgroups<-k+1 ygroups<-2 #============================================================================================================ # Functions to simulate data and create weights for Monte carlo calculations #============================================================================================================ # Simulating data for logistic regression simdatalogistic<-function(N,alphax,xgroups,alphay, betay,ygroups){ sim<-matrix(NA,N,2) #binomial distribution of x with nuber of trials=xgroups-1 x<-rbinom(N,xgroups-1,exp(alphax)/(1+exp(alphax))) #y given x y<-rbinom(N,ygroups-1,exp(alphay+betay*x)/(1+exp(alphay+betay*x))) sim[,1]<-x sim[,2]<-y return(sim) } # The weights below corresponds to the following sampling scheme # P(A|y>=cutpoint)=1 # P(A|y= cutpoint) { ascprob[i,]<-1 yhigh<-yhigh+1 } if (ph[i] < cutpoint){ ascprob[i,]<-pay0 } } return(list(ascprob=ascprob,yhigh=yhigh)) } #============================================================================================================ # Functions to calculate efficiency #============================================================================================================ #------------------------------------------------------------------------------------------------------------ # Function to calculate \psi #------------------------------------------------------------------------------------------------------------ psi<-function(N,alphax,xgroups,alpha, beta,ygroups,x,y){ dalphax<-(exp(alphax)*(x-(xgroups-1))+x)/(1+exp(alphax)) dalpha<-(exp(alpha+beta*x)*(y-(ygroups-1))+y)/(1+exp(alpha+beta*x)) dbeta<-x*(exp(alpha+beta*x)*(y-(ygroups-1))+y)/(1+exp(alpha+beta*x)) dpar<-matrix(c(dalphax,dalpha,dbeta),N,parnum) return(dpar) } #------------------------------------------------------------------------------------------------------------ # Function to calculate P(z) #------------------------------------------------------------------------------------------------------------ Pz<-function(alphax,xgroups,alpha, beta,ygroups,x,y){ dbinom(y, size=(ygroups-1),prob=exp(alpha+beta*x)/(1+exp(alpha+beta*x)))*dbinom(x, size=(xgroups-1), prob=exp(alphax)/(1+exp(alphax))) } #------------------------------------------------------------------------------------------------------------ # Function to calculate P(y) #------------------------------------------------------------------------------------------------------------ Py<-function(alphax,xgroups,alpha, beta,ygroups,x,y){ py<-0 for(j in 1:xgroups){ py<-Pz(alphax,xgroups,alpha, beta,ygroups,(j-1),y)+py } return(py) } #------------------------------------------------------------------------------------------------------------ # Function to calculate variances #------------------------------------------------------------------------------------------------------------ variances<-function(N,alphax,xgroups,alpha, beta,ygroups,x,y,w,Nstar,parnum,psidata,Iy){ #-------------------------------------------------------------------------------------------------------- # Calculating the fist term of the information as E[(f'/f)^2] # (Could be computed more efficiently since symmetry) #-------------------------------------------------------------------------------------------------------- infopart1<-matrix(0,parnum,parnum) for (j in 1:length(y)){ Epsiy2<-matrix(0,parnum,parnum) for (k in 1:xgroups){ psidatajs<-psi(1,alphax,xgroups,alpha, beta,ygroups,(k-1),y[j]) Epsiy2<-w[j,]*Pz(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])/Py(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])*t(psidatajs)%*%psidatajs+Epsiy2 } infopart1<-Epsiy2/N+infopart1 } #-------------------------------------------------------------------------------------------------------- # Calculating the second term of the information #-------------------------------------------------------------------------------------------------------- #-------------------------------------------------------------------------------------------------------- #Joint likelihood #-------------------------------------------------------------------------------------------------------- infopart2joint<-matrix(NA,parnum,parnum) for (i in 1:parnum){ for (j in 1:parnum){ infopart2joint[i,j]<-sum(psidata[,i]*w)/N*sum(psidata[,j]*w)/N } } #-------------------------------------------------------------------------------------------------------- #Retrospective likelihood #-------------------------------------------------------------------------------------------------------- sumIyretro<-matrix(0,parnum,parnum) for (j in 1:length(y)){ Epsiy<-matrix(0,1,parnum) for (k in 1:xgroups){ psidatajs<-psi(1,alphax,xgroups,alpha, beta,ygroups,(k-1),y[j]) Epsiy<-Pz(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])/Py(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])*psidatajs+Epsiy } sumIyretro<-w[j,]*t(Epsiy)%*%Epsiy+sumIyretro } #-------------------------------------------------------------------------------------------------------- # Calculating joint and twostage information matrices from the parts above #-------------------------------------------------------------------------------------------------------- Iretro<-(infopart1-sumIyretro/N) infojoint<-infopart1-infopart2joint infotwo<-Iy+Iretro #-------------------------------------------------------------------------------------------------------- # Inverting the information matrix to get variance covariance matrix #-------------------------------------------------------------------------------------------------------- variancematrixjoint<-solve(infojoint) variancematrixtwo<-solve(infotwo) #-------------------------------------------------------------------------------------------------------- # Specifying output of function "variancematrix" #-------------------------------------------------------------------------------------------------------- return(list(variancematrixjoint=variancematrixjoint,variancematrixtwo=variancematrixtwo)) } #============================================================================================================ # Running simulations #============================================================================================================ #------------------------------------------------------------------------------------------------------------ # Simulate data for logistic regression #------------------------------------------------------------------------------------------------------------ data<-simdatalogistic(N,alphax,xgroups,alpha, beta,ygroups) x<-data[,1] y<-data[,2] #------------------------------------------------------------------------------------------------------------ # Calculating psi for simulated datausing function "psi" #------------------------------------------------------------------------------------------------------------ psidata<-psi(N,alphax,xgroups,alpha, beta,ygroups,x,y) #------------------------------------------------------------------------------------------------------------ # Calculating \hat{I}_{Y}(\theta) #------------------------------------------------------------------------------------------------------------ sumIy<-matrix(0,parnum,parnum) for (j in 1:length(y)){ Epsiy<-matrix(0,1,parnum) for (k in 1:xgroups){ psidatajs<-psi(1,alphax,xgroups,alpha, beta,ygroups,(k-1),y[j]) Epsiy<-Pz(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])/Py(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])*psidatajs+Epsiy } sumIy<-t(Epsiy)%*%Epsiy+ sumIy } Iy<-sumIy/N #------------------------------------------------------------------------------------------------------------ # Running Monte Carlo simulations #------------------------------------------------------------------------------------------------------------ varperpay0s<-matrix(NA,parnum,length(pay0s)) varperpay0sp<-matrix(NA,(parnum-xparnum),length(pay0s)) varperpay0stwo<-matrix(NA,parnum,length(pay0s)) pAhat<-matrix(NA,1,length(pay0s)) yhighs<-matrix(NA,1,length(pay0s)) for (h in 1:length(pay0s)){ weightcalc<-weights(y,pay0s[h],cutpoint) w<-weightcalc$ascprob yhighs[,h]<-weightcalc$yhigh/sum(w) pAhat[,h]<-sum(w)/N # The sample size after ascertainment<- sum of the ascertainment prob. Nstar<-N*pAhat[,h] varperpay0<-variances(N,alphax,xgroups,alpha, beta,ygroups,x,y,w,Nstar,parnum,psidata,Iy) # Ascertainment sample varperpay0s[,h]<-diag(varperpay0$variancematrixjoint) #Twostage sample varperpay0stwo[,h]<-diag(varperpay0$variancematrixtwo) } #------------------------------------------------------------------------------------------------------------ # Saving the results to your working directory #------------------------------------------------------------------------------------------------------------ results<-list(title="Efficiency of samling schemes for logistic regression", created=date(), N=N,alphax=alphax,xgroups=xgroups,alpha=alpha, beta=beta,ygroups=ygroups, cutpoint=cutpoint,pay0s=pay0s,varperpay0s=varperpay0s,varperpay0stwo=varperpay0stwo,pAhat=pAhat,yhighs=yhighs) dput(results, file = "EfficiencyLogisticRegression.txt") #============================================================================================================ # Creating plots #============================================================================================================ #------------------------------------------------------------------------------------------------------------ #Retrieving your results from file in your working directory #------------------------------------------------------------------------------------------------------------ ResultsFromFile<-dget(file = "EfficiencyLogisticRegression.txt") #If you have trouble saving the file you can instead use the following line #ResultsFromFile<-results #------------------------------------------------------------------------------------------------------------ #Preparation for plots #------------------------------------------------------------------------------------------------------------ pay0s<-ResultsFromFile$pay0s varperpay0s<-ResultsFromFile$varperpay0s varperpay0stwo<-ResultsFromFile$varperpay0stwo pAhat<-ResultsFromFile$pAhat c<-matrix(pAhat,length(pAhat),1) yhighs<-ResultsFromFile$yhighs ymitt<-abs(yhighs-0.5) yhalf<-c[ymitt==min(ymitt)] alphaxj<-varperpay0s[1,length(pay0s)]/varperpay0s[1,] alphaj<-varperpay0s[2,length(pay0s)]/varperpay0s[2,] betaj<-varperpay0s[3,length(pay0s)]/varperpay0s[3,] alphaxtwo<-varperpay0s[1,length(pay0s)]/varperpay0stwo[1,] alphatwo<-varperpay0s[2,length(pay0s)]/varperpay0stwo[2,] betatwo<-varperpay0s[3,length(pay0s)]/varperpay0stwo[3,] #================================================================================================ #------------------------------------------------------------------------------------------------ # Plots comparing cost scearios, two-stage sample (Example 3) #------------------------------------------------------------------------------------------------ #================================================================================================ #Upper and lower limit for plot ul<-3 ll<--2 # number of plots in window op<-par(mfrow=c(2,3)) #------------------------------------------------------------------------------------------------ # Specifying costs (specified by user!) #------------------------------------------------------------------------------------------------ cost1<-0 cost2<-1 RAC1<-(c*cost2+(1-c)*cost1)/cost2 cost1<-1/3 cost2<-1 RAC2<-(c*cost2+(1-c)*cost1)/cost2 cost1<-0.5 cost2<-1 RAC3<-(c*cost2+(1-c)*cost1)/cost2 #------------------------------------------------------------------------------------------------ # Plots comparing cost scearios, Efficiency # Two-stage sample #------------------------------------------------------------------------------------------------ #x-axis c<-pay0s eff<-matrix(c(alphaxtwo),length(c),1) matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency",main=expression(paste('Efficiency ',hat(gamma)))) lines(c(0,1),c(0,1)) eff<-matrix(c(alphatwo),length(c),1) matplot(c,eff,type="l", col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency",main=expression(paste('Efficiency ',hat(alpha)))) lines(c(0,1),c(0,1)) eff<-matrix(c(betatwo),length(c),1) matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency", main=expression(paste('Efficiency ',hat(beta)))) lines(c(0,1),c(0,1)) #------------------------------------------------------------------------------------------------ # Plots comparing cost scearios, Cost adjusted efficiency # Two-stage sample #------------------------------------------------------------------------------------------------ #x-axis c<-pay0s eff<-matrix(c(alphaxtwo/t(RAC1),alphaxtwo/t(RAC2),alphaxtwo/t(RAC3)),length(c),3) matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Cost adjusted efficiency", main=expression(paste('Efficiency ',hat(gamma)))) lines(c(0,1),c(1,1)) eff<-matrix(c(alphatwo/t(RAC1),alphatwo/t(RAC2),alphatwo/t(RAC3)),length(c),3) matplot(c,eff,type="l", col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Cost adjusted efficiency", main=expression(paste('Efficiency ',hat(alpha)))) lines(c(0,1),c(1,1)) eff<-matrix(c(betatwo/t(RAC1),betatwo/t(RAC2),betatwo/t(RAC3)),length(c),3) matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Cost adjusted efficiency", main=expression(paste('Efficiency ',hat(beta)))) lines(c(0,1),c(1,1)) #================================================================================================ #------------------------------------------------------------------------------------------------ # Plots comparing ascertainment and two-stage (Example 6) #------------------------------------------------------------------------------------------------ #================================================================================================ # x-axis c<-pay0s # number of plots in window op<-par(mfrow=c(1,3)) eff<-matrix(c(alphaxj,alphaxtwo),length(c),2) matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency", main=expression(paste('Efficiency ',hat(gamma)))) lines(c(0,1),c(0,1)) lines(c(yhalf,yhalf),c(0,1),lty=1) eff<-matrix(c(alphaj,alphatwo),length(c),2) matplot(c,eff,type="l", col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency", main=expression(paste('Efficiency ',hat(alpha)))) lines(c(0,1),c(0,1)) lines(c(yhalf,yhalf),c(0,1),lty=1) eff<-matrix(c(betaj,betatwo),length(c),2) matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency", main=expression(paste('Efficiency ',hat(beta)))) lines(c(0,1),c(0,1)) lines(c(yhalf,yhalf),c(0,1),lty=1)