#============================================================================================================ # # Licensing: # # Copyright Maria Grünewald and Ola Hössjer (2010) # # This code is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. See . # # This code is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # # Citation: # # The methodology used in this library is described in Grünewald & Hössjer (2011) A General Statistical Framework # for Multistage Designs, ... # Please refer to this paper when using the library. #============================================================================================================# # Contents: # # This library computes and plots efficiencies for sequential design in linear regression (Gaussian outcome) # For description of the model see Example 4 in # Grünewald & Hössjer (2011) A General Statistical Framework # for Multistage Designs, ... # # # Output: # # File "EfficiencyLinearRegression.txt" saved to R working directory # containing settings and numerical results from simulation run # Figures shown in R Graphics devise # *Efficiency plot for regression parameter hat(beta) # *Cost efficiency plot for regression parameter hat(beta) # #============================================================================================================ # # The code is intended for use in the software R (version 2.10.0): # R Development Core Team (2008). R: A language and environment for # statistical computing. R Foundation for Statistical Computing, # Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org. # #============================================================================================================ #============================================================================================================ # Setting parameters for your calculations #============================================================================================================ #------------------------------------------------------------------------------------------------------------ # Parameters specified by user #------------------------------------------------------------------------------------------------------------ # Number of simulated individuals. This number does not refer to the number of subjects in the actual study, # but only affects how well the Monte Calo method approximates theoretical results. N<-500 # Parameter for x~Bin(1,p) # p=exp(alphax)/(1+exp(alphax)) alphax<-2.037135 # parameters for y~N(mu,sigma) # mu=alpha+beta*x alpha<-3.220368 beta<-0.013593802 sigma<-sqrt(0.02858095) # for sampling scheme with only two regions "cutpoint" determines these. # See function "weights" for further specification cutpoint<-log(30) # Specifying sampling probabilities P(J=2|y=1) pay0s=c(0.00005,0.0005,0.005,0.02,0.04,0.06,0.08,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.95,0.975,1) #------------------------------------------------------------------------------------------------------------ # The following parameters follow from settings above #------------------------------------------------------------------------------------------------------------ #number of parameters in the model parnum<-4 # number of parameters in explanatory variable xparnum<-1 parnumfirsty<-xparnum+1 xgroups<-2 ygroups<-2 p<-exp(alphax)/(1+exp(alphax)) #============================================================================================================ # Functions to simulate data and create weights for Monte carlo calculations #============================================================================================================ # Simulating data for linear regression simdata<-function(N,p,xgroups,effekt,meanaa,sigma2){ sim<-matrix(8,N,2) # x~Bin(xgroups-1,p) x<-rbinom(N,xgroups-1,p) #y|x~N(meanaa+x*effekt,sqrt(sigma2)) y<-rnorm(N,meanaa+x*effekt,sqrt(sigma2)) sim[,1]<-x sim[,2]<-y return(sim) } # The weights below corresponds to the following sampling scheme # P(A|y>=cutpoint)=1 # P(A|y= cutpoint) { ascprob[i,]<-1 yhigh<-yhigh+1 } if (y[i] < cutpoint){ ascprob[i,]<-pay0 } } return(list(ascprob=ascprob,yhigh=yhigh)) } #============================================================================================================ # Functions to calculate efficiency #============================================================================================================ #------------------------------------------------------------------------------------------------------------ # Function to calculate \psi #------------------------------------------------------------------------------------------------------------ psi<-function(N,alphax,xgroups,alpha, beta,sigma,x,y){ dalphax<-(exp(alphax)*(x-(xgroups-1))+x)/(1+exp(alphax)) dalpha<-(-(alpha+beta*x-y))/sigma^2 dbeta<-x*(-(alpha+beta*x-y))/sigma^2 dsigma<-(-sigma^2+(alpha+beta*x-y)^2)/sigma^3 dpar<-matrix(c(dalphax,dalpha,dbeta,dsigma),N,parnum) return(dpar) } #------------------------------------------------------------------------------------------------------------ # Function to calculate P(z) #------------------------------------------------------------------------------------------------------------ Pz<-function(alphax,xgroups, alpha, beta,sigma,x,y){ dnorm(y, mean=(alpha+beta*x), sd=sigma)*dbinom(x, size=(xgroups-1), prob=exp(alphax)/(1+exp(alphax))) } #------------------------------------------------------------------------------------------------------------ # Function to calculate P(y) #------------------------------------------------------------------------------------------------------------ Py<-function(alphax,xgroups, alpha, beta,sigma,x,y){ py<-0 for(j in 1:xgroups){ py<-Pz(alphax,xgroups, alpha, beta,sigma,(j-1),y)+py } return(py) } #------------------------------------------------------------------------------------------------------------ # Function to calculate variances #------------------------------------------------------------------------------------------------------------ variances<-function(N,alphax,beta,alpha,sigma,xgroups,x,y,w,parnum,psidata,Iy){ pA<-sum(w)/N #-------------------------------------------------------------------------------------------------------- # Calculating the fist term of the information as E[(f'/f)^2] # (Could be computed more efficiently since symmetry) #-------------------------------------------------------------------------------------------------------- infopart1<-matrix(0,parnum,parnum) for (j in 1:length(y)){ Epsiy2<-matrix(0,parnum,parnum) for (k in 1:xgroups){ psidatajs<-psi(1,alphax,xgroups,alpha, beta,sigma,(k-1),y[j]) Epsiy2<-w[j,]*Pz(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])/Py(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])*t(psidatajs)%*%psidatajs+Epsiy2 } infopart1<-Epsiy2/N+infopart1 } #-------------------------------------------------------------------------------------------------------- # Calculating the second term of the information #-------------------------------------------------------------------------------------------------------- #-------------------------------------------------------------------------------------------------------- #Joint likelihood #-------------------------------------------------------------------------------------------------------- infopart2joint<-matrix(NA,parnum,parnum) for (i in 1:parnum){ for (j in 1:parnum){ infopart2joint[i,j]<-sum(psidata[,i]*w)/N*sum(psidata[,j]*w)/N } } #-------------------------------------------------------------------------------------------------------- #Retrospective likelihood #-------------------------------------------------------------------------------------------------------- sumIyretro<-matrix(0,parnum,parnum) for (j in 1:length(y)){ Epsiy<-matrix(0,1,parnum) for (k in 1:xgroups){ psidatajs<-psi(1,alphax,xgroups,alpha, beta,sigma,(k-1),y[j]) Epsiy<-Pz(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])/Py(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])*psidatajs+Epsiy } sumIyretro<-w[j,]*t(Epsiy)%*%Epsiy+sumIyretro } Iretro<-(infopart1-sumIyretro/N) #-------------------------------------------------------------------------------------------------------- # Calculating joint and twostage information matrices from the parts above #-------------------------------------------------------------------------------------------------------- infojoint<-infopart1-infopart2joint infotwo<-Iy+Iretro infoSEM<-infojoint+infopart2joint*pA/(1-pA) #-------------------------------------------------------------------------------------------------------- # Inverting the information matrix to get variance covariance matrix #-------------------------------------------------------------------------------------------------------- variancematrixjoint<-solve(infojoint) variancematrixtwo<-solve(infotwo) variansmatrixSEM<-solve(infoSEM) #-------------------------------------------------------------------------------------------------------- # Specifying output of function "variancematrix" #-------------------------------------------------------------------------------------------------------- return(list(variancematrixjoint=variancematrixjoint,variancematrixtwo=variancematrixtwo,variansmatrixSEM=variansmatrixSEM)) } #============================================================================================================ # Running simulations #============================================================================================================ #------------------------------------------------------------------------------------------------------------ # Simulate data for linear regression #------------------------------------------------------------------------------------------------------------ data<-simdata(N,p,xgroups,beta,alpha,sigma^2) x<-data[,1] y<-data[,2] #------------------------------------------------------------------------------------------------------------ # Calculating psi for simulated datausing function "psi" #------------------------------------------------------------------------------------------------------------ psidata<-psi(N,alphax,xgroups,alpha, beta,sigma,x,y) #------------------------------------------------------------------------------------------------------------ # Calculating \hat{I}_{Y}(\theta) #------------------------------------------------------------------------------------------------------------ sumIy<-matrix(0,parnum,parnum) for (j in 1:length(y)){ Epsiy<-matrix(0,1,parnum) for (k in 1:xgroups){ psidatajs<-psi(1,alphax,xgroups,alpha, beta,sigma,(k-1),y[j]) Epsiy<-Pz(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])/Py(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])*psidatajs+Epsiy } sumIy<-t(Epsiy)%*%Epsiy+ sumIy } Iy<-sumIy/N #------------------------------------------------------------------------------------------------------------ # Running Monte Carlo simulations #------------------------------------------------------------------------------------------------------------ varperpay0s<-matrix(NA,parnum,length(pay0s)) varperpay0stwo<-matrix(NA,parnum,length(pay0s)) # (The likelihood below is not plotted in this paper) varperpay0sSEM<-matrix(NA,parnum,length(pay0s)) pAhat<-matrix(NA,1,length(pay0s)) yhighs<-matrix(NA,1,length(pay0s)) for (h in 1:length(pay0s)){ weightcalc<-weights(y,pay0s[h],cutpoint) w<-weightcalc$ascprob yhighs[,h]<-weightcalc$yhigh/sum(w) pAhat[,h]<-sum(w)/N varperpay0<-variances(N,alphax,beta,alpha,sigma,xgroups,x,y,w,parnum,psidata,Iy) #Ascertainment sample varperpay0s[,h]<-diag(varperpay0$variancematrixjoint) #Twostage sample varperpay0stwo[,h]<-diag(varperpay0$variancematrixtwo) varperpay0sSEM[,h]<-diag(varperpay0$variansmatrixSEM) } #------------------------------------------------------------------------------------------------------------ # Saving the results to your working directory #------------------------------------------------------------------------------------------------------------ results<-list(title="Efficiency of samling schemes for linear regression", created=date(), N=N,alphax=alphax,xgroups=xgroups,alpha=alpha, beta=beta,ygroups=ygroups, cutpoint=cutpoint,pay0s=pay0s,varperpay0s=varperpay0s,varperpay0sSEM=varperpay0sSEM,varperpay0stwo=varperpay0stwo,pAhat=pAhat,yhighs=yhighs) dput(results, file = "EfficiencyLinearRegression.txt") #============================================================================================================ # Creating plots #============================================================================================================ #------------------------------------------------------------------------------------------------------------ #Retrieving your results from file in your working directory #------------------------------------------------------------------------------------------------------------ ResultsFromFile<-dget(file = "EfficiencyLinearRegression.txt") #If you have trouble saving the file you can instead use the following line #ResultsFromFile<-results #------------------------------------------------------------------------------------------------------------ # Preparation for plots #----------------------------------------------------------------------------------------------------------- LinRegtwo<-ResultsFromFile$varperpay0stwo pay0s<-ResultsFromFile$pay0s c<-ResultsFromFile$pAhat betaLinReg<-LinRegtwo[3,length(pay0s)]/LinRegtwo[3,] cLinReg<-matrix(ResultsFromFile$pay0s,length(ResultsFromFile$pay0s),1) #------------------------------------------------------------------------------------------------------------ # Chosing your cost functions (three different cost functions) #------------------------------------------------------------------------------------------------------------ cost1<-0 cost2<-1 RAC1<-((c*cost2+(1-c)*cost1)/cost2) cost1<-1/3 cost2<-1 RAC2<-((c*cost2+(1-c)*cost1)/cost2) cost1<-0.5 cost2<-1 RAC3<-((c*cost2+(1-c)*cost1)/cost2) #---------------------------------------------------------------------------------------- # Plots efficiency hat(beta) #---------------------------------------------------------------------------------------- op<-par(mfrow=c(2,2)) # Efficiency op<-par(mfrow=c(1,2)) eff<-matrix(c(betaLinReg),length(cLinReg),1) matplot(cLinReg,eff,type="l",col=1,xlab=expression(paste('P(J=2| BMI<30)')),ylab="Efficiency") #abline(reg) lines(c(0,1),c(0,1)) title('Efficiency of regression parameter') # Cost adjusted efficiency ul<-max(c(betaLinReg/t(RAC1),betaLinReg/t(RAC2),betaLinReg/t(RAC3)))+0.1 eff<-matrix(c(betaLinReg/t(RAC1),betaLinReg/t(RAC2),betaLinReg/t(RAC3)),length(c),3) matplot(cLinReg,eff,type="l",col=1,xlim=c(0,1),ylim=c(0,ul),xlab=expression(paste('P(J=2| BMI<30)')),ylab="Cost adjusted efficiency") lines(c(0,1),c(1,1)) title('CE of regression parameter')