#============================================================================================================
#
# Licensing:
#
# Copyright Maria Grünewald and Ola Hössjer (2010)
#
# This code is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version. See .
#
# This code is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
#
# Citation:
#
# The methodology used in this library is described in Grünewald & Hössjer (2011) A General Statistical Framework
# for Multistage Designs, ...
# Please refer to this paper when using the library.
#============================================================================================================
#
# Contents:
#
# This library computes and plots efficiencies for sequential design in logistic regression (Binary outcome)
# For description of the model see Example 3 and Example 6 in
# Grünewald & Hössjer (2010), A General Statistical Framework for Multistage Designs, ...
#
# Output:
#
# File "EfficiencyLogisticRegression.txt" saved to R working directory
# containing settings and numerical results from simulation run
# Figures shown in R Graphics devise
# *Plots comparing cost scearios, two-stage sample (6 plots)
# *Plots comparing ascertainment and two-stage (3 plots)
#
#============================================================================================================
#
# The code is intended for use in the software R (version 2.10.0):
# R Development Core Team (2008). R: A language and environment for
# statistical computing. R Foundation for Statistical Computing,
# Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org.
#
#============================================================================================================
#============================================================================================================
# Setting parameters for your calculations
#============================================================================================================
#------------------------------------------------------------------------------------------------------------
# Parameters specified by user
#------------------------------------------------------------------------------------------------------------
# Number of simulated individuals. This number does not refer to the number of subjects in the actual study,
# but only affects how well the Monte Calo method approximates theoretical results.
N<-500
# Parameter for x~Bin(k,p)
# p=exp(alphax)/(1+exp(alphax))
alphax<--1
k<-1
# parameters for y~Bin(2,py)
# py=exp(alpha+beta*x)/(1+exp(alpha+beta*x))
alpha<--2
beta<-2
# for sampling scheme with only two regions "cutpoint" determines these.
# See function "weights" for further specification
cutpoint<-0.5
# Specifying sampling probabilities P(J=2|y=1)
pay0s<-c(0.00005,0.0005,0.005,0.02,0.04,0.06,0.08,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.95,0.975,1)
#------------------------------------------------------------------------------------------------------------
# The following parameters follow from settings above
#------------------------------------------------------------------------------------------------------------
# number of parameters in the model
parnum<-3
# number of parameters in explanatory variable
xparnum<-1
xgroups<-k+1
ygroups<-2
#============================================================================================================
# Functions to simulate data and create weights for Monte carlo calculations
#============================================================================================================
# Simulating data for logistic regression
simdatalogistic<-function(N,alphax,xgroups,alphay, betay,ygroups){
sim<-matrix(NA,N,2)
#binomial distribution of x with nuber of trials=xgroups-1
x<-rbinom(N,xgroups-1,exp(alphax)/(1+exp(alphax)))
#y given x
y<-rbinom(N,ygroups-1,exp(alphay+betay*x)/(1+exp(alphay+betay*x)))
sim[,1]<-x
sim[,2]<-y
return(sim)
}
# The weights below corresponds to the following sampling scheme
# P(A|y>=cutpoint)=1
# P(A|y= cutpoint) {
ascprob[i,]<-1
yhigh<-yhigh+1
}
if (ph[i] < cutpoint){
ascprob[i,]<-pay0
}
}
return(list(ascprob=ascprob,yhigh=yhigh))
}
#============================================================================================================
# Functions to calculate efficiency
#============================================================================================================
#------------------------------------------------------------------------------------------------------------
# Function to calculate \psi
#------------------------------------------------------------------------------------------------------------
psi<-function(N,alphax,xgroups,alpha, beta,ygroups,x,y){
dalphax<-(exp(alphax)*(x-(xgroups-1))+x)/(1+exp(alphax))
dalpha<-(exp(alpha+beta*x)*(y-(ygroups-1))+y)/(1+exp(alpha+beta*x))
dbeta<-x*(exp(alpha+beta*x)*(y-(ygroups-1))+y)/(1+exp(alpha+beta*x))
dpar<-matrix(c(dalphax,dalpha,dbeta),N,parnum)
return(dpar)
}
#------------------------------------------------------------------------------------------------------------
# Function to calculate P(z)
#------------------------------------------------------------------------------------------------------------
Pz<-function(alphax,xgroups,alpha, beta,ygroups,x,y){
dbinom(y, size=(ygroups-1),prob=exp(alpha+beta*x)/(1+exp(alpha+beta*x)))*dbinom(x, size=(xgroups-1), prob=exp(alphax)/(1+exp(alphax)))
}
#------------------------------------------------------------------------------------------------------------
# Function to calculate P(y)
#------------------------------------------------------------------------------------------------------------
Py<-function(alphax,xgroups,alpha, beta,ygroups,x,y){
py<-0
for(j in 1:xgroups){
py<-Pz(alphax,xgroups,alpha, beta,ygroups,(j-1),y)+py
}
return(py)
}
#------------------------------------------------------------------------------------------------------------
# Function to calculate variances
#------------------------------------------------------------------------------------------------------------
variances<-function(N,alphax,xgroups,alpha, beta,ygroups,x,y,w,Nstar,parnum,psidata,Iy){
#--------------------------------------------------------------------------------------------------------
# Calculating the fist term of the information as E[(f'/f)^2]
# (Could be computed more efficiently since symmetry)
#--------------------------------------------------------------------------------------------------------
infopart1<-matrix(0,parnum,parnum)
for (j in 1:length(y)){
Epsiy2<-matrix(0,parnum,parnum)
for (k in 1:xgroups){
psidatajs<-psi(1,alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])
Epsiy2<-w[j,]*Pz(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])/Py(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])*t(psidatajs)%*%psidatajs+Epsiy2
}
infopart1<-Epsiy2/N+infopart1
}
#--------------------------------------------------------------------------------------------------------
# Calculating the second term of the information
#--------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------
#Joint likelihood
#--------------------------------------------------------------------------------------------------------
infopart2joint<-matrix(NA,parnum,parnum)
for (i in 1:parnum){
for (j in 1:parnum){
infopart2joint[i,j]<-sum(psidata[,i]*w)/N*sum(psidata[,j]*w)/N
}
}
#--------------------------------------------------------------------------------------------------------
#Retrospective likelihood
#--------------------------------------------------------------------------------------------------------
sumIyretro<-matrix(0,parnum,parnum)
for (j in 1:length(y)){
Epsiy<-matrix(0,1,parnum)
for (k in 1:xgroups){
psidatajs<-psi(1,alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])
Epsiy<-Pz(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])/Py(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])*psidatajs+Epsiy
}
sumIyretro<-w[j,]*t(Epsiy)%*%Epsiy+sumIyretro
}
#--------------------------------------------------------------------------------------------------------
# Calculating joint and twostage information matrices from the parts above
#--------------------------------------------------------------------------------------------------------
Iretro<-(infopart1-sumIyretro/N)
infojoint<-infopart1-infopart2joint
infotwo<-Iy+Iretro
#--------------------------------------------------------------------------------------------------------
# Inverting the information matrix to get variance covariance matrix
#--------------------------------------------------------------------------------------------------------
variancematrixjoint<-solve(infojoint)
variancematrixtwo<-solve(infotwo)
#--------------------------------------------------------------------------------------------------------
# Specifying output of function "variancematrix"
#--------------------------------------------------------------------------------------------------------
return(list(variancematrixjoint=variancematrixjoint,variancematrixtwo=variancematrixtwo))
}
#============================================================================================================
# Running simulations
#============================================================================================================
#------------------------------------------------------------------------------------------------------------
# Simulate data for logistic regression
#------------------------------------------------------------------------------------------------------------
data<-simdatalogistic(N,alphax,xgroups,alpha, beta,ygroups)
x<-data[,1]
y<-data[,2]
#------------------------------------------------------------------------------------------------------------
# Calculating psi for simulated datausing function "psi"
#------------------------------------------------------------------------------------------------------------
psidata<-psi(N,alphax,xgroups,alpha, beta,ygroups,x,y)
#------------------------------------------------------------------------------------------------------------
# Calculating \hat{I}_{Y}(\theta)
#------------------------------------------------------------------------------------------------------------
sumIy<-matrix(0,parnum,parnum)
for (j in 1:length(y)){
Epsiy<-matrix(0,1,parnum)
for (k in 1:xgroups){
psidatajs<-psi(1,alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])
Epsiy<-Pz(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])/Py(alphax,xgroups,alpha, beta,ygroups,(k-1),y[j])*psidatajs+Epsiy
}
sumIy<-t(Epsiy)%*%Epsiy+ sumIy
}
Iy<-sumIy/N
#------------------------------------------------------------------------------------------------------------
# Running Monte Carlo simulations
#------------------------------------------------------------------------------------------------------------
varperpay0s<-matrix(NA,parnum,length(pay0s))
varperpay0sp<-matrix(NA,(parnum-xparnum),length(pay0s))
varperpay0stwo<-matrix(NA,parnum,length(pay0s))
pAhat<-matrix(NA,1,length(pay0s))
yhighs<-matrix(NA,1,length(pay0s))
for (h in 1:length(pay0s)){
weightcalc<-weights(y,pay0s[h],cutpoint)
w<-weightcalc$ascprob
yhighs[,h]<-weightcalc$yhigh/sum(w)
pAhat[,h]<-sum(w)/N
# The sample size after ascertainment<- sum of the ascertainment prob.
Nstar<-N*pAhat[,h]
varperpay0<-variances(N,alphax,xgroups,alpha, beta,ygroups,x,y,w,Nstar,parnum,psidata,Iy)
# Ascertainment sample
varperpay0s[,h]<-diag(varperpay0$variancematrixjoint)
#Twostage sample
varperpay0stwo[,h]<-diag(varperpay0$variancematrixtwo)
}
#------------------------------------------------------------------------------------------------------------
# Saving the results to your working directory
#------------------------------------------------------------------------------------------------------------
results<-list(title="Efficiency of samling schemes for logistic regression", created=date(), N=N,alphax=alphax,xgroups=xgroups,alpha=alpha, beta=beta,ygroups=ygroups, cutpoint=cutpoint,pay0s=pay0s,varperpay0s=varperpay0s,varperpay0stwo=varperpay0stwo,pAhat=pAhat,yhighs=yhighs)
dput(results, file = "EfficiencyLogisticRegression.txt")
#============================================================================================================
# Creating plots
#============================================================================================================
#------------------------------------------------------------------------------------------------------------
#Retrieving your results from file in your working directory
#------------------------------------------------------------------------------------------------------------
ResultsFromFile<-dget(file = "EfficiencyLogisticRegression.txt")
#If you have trouble saving the file you can instead use the following line
#ResultsFromFile<-results
#------------------------------------------------------------------------------------------------------------
#Preparation for plots
#------------------------------------------------------------------------------------------------------------
pay0s<-ResultsFromFile$pay0s
varperpay0s<-ResultsFromFile$varperpay0s
varperpay0stwo<-ResultsFromFile$varperpay0stwo
pAhat<-ResultsFromFile$pAhat
c<-matrix(pAhat,length(pAhat),1)
yhighs<-ResultsFromFile$yhighs
ymitt<-abs(yhighs-0.5)
yhalf<-c[ymitt==min(ymitt)]
alphaxj<-varperpay0s[1,length(pay0s)]/varperpay0s[1,]
alphaj<-varperpay0s[2,length(pay0s)]/varperpay0s[2,]
betaj<-varperpay0s[3,length(pay0s)]/varperpay0s[3,]
alphaxtwo<-varperpay0s[1,length(pay0s)]/varperpay0stwo[1,]
alphatwo<-varperpay0s[2,length(pay0s)]/varperpay0stwo[2,]
betatwo<-varperpay0s[3,length(pay0s)]/varperpay0stwo[3,]
#================================================================================================
#------------------------------------------------------------------------------------------------
# Plots comparing cost scearios, two-stage sample (Example 3)
#------------------------------------------------------------------------------------------------
#================================================================================================
#Upper and lower limit for plot
ul<-3
ll<--2
# number of plots in window
op<-par(mfrow=c(2,3))
#------------------------------------------------------------------------------------------------
# Specifying costs (specified by user!)
#------------------------------------------------------------------------------------------------
cost1<-0
cost2<-1
RAC1<-(c*cost2+(1-c)*cost1)/cost2
cost1<-1/3
cost2<-1
RAC2<-(c*cost2+(1-c)*cost1)/cost2
cost1<-0.5
cost2<-1
RAC3<-(c*cost2+(1-c)*cost1)/cost2
#------------------------------------------------------------------------------------------------
# Plots comparing cost scearios, Efficiency
# Two-stage sample
#------------------------------------------------------------------------------------------------
#x-axis
c<-pay0s
eff<-matrix(c(alphaxtwo),length(c),1)
matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency",main=expression(paste('Efficiency ',hat(gamma))))
lines(c(0,1),c(0,1))
eff<-matrix(c(alphatwo),length(c),1)
matplot(c,eff,type="l", col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency",main=expression(paste('Efficiency ',hat(alpha))))
lines(c(0,1),c(0,1))
eff<-matrix(c(betatwo),length(c),1)
matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency", main=expression(paste('Efficiency ',hat(beta))))
lines(c(0,1),c(0,1))
#------------------------------------------------------------------------------------------------
# Plots comparing cost scearios, Cost adjusted efficiency
# Two-stage sample
#------------------------------------------------------------------------------------------------
#x-axis
c<-pay0s
eff<-matrix(c(alphaxtwo/t(RAC1),alphaxtwo/t(RAC2),alphaxtwo/t(RAC3)),length(c),3)
matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Cost adjusted efficiency", main=expression(paste('Efficiency ',hat(gamma))))
lines(c(0,1),c(1,1))
eff<-matrix(c(alphatwo/t(RAC1),alphatwo/t(RAC2),alphatwo/t(RAC3)),length(c),3)
matplot(c,eff,type="l", col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Cost adjusted efficiency", main=expression(paste('Efficiency ',hat(alpha))))
lines(c(0,1),c(1,1))
eff<-matrix(c(betatwo/t(RAC1),betatwo/t(RAC2),betatwo/t(RAC3)),length(c),3)
matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Cost adjusted efficiency", main=expression(paste('Efficiency ',hat(beta))))
lines(c(0,1),c(1,1))
#================================================================================================
#------------------------------------------------------------------------------------------------
# Plots comparing ascertainment and two-stage (Example 6)
#------------------------------------------------------------------------------------------------
#================================================================================================
# x-axis
c<-pay0s
# number of plots in window
op<-par(mfrow=c(1,3))
eff<-matrix(c(alphaxj,alphaxtwo),length(c),2)
matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency", main=expression(paste('Efficiency ',hat(gamma))))
lines(c(0,1),c(0,1))
lines(c(yhalf,yhalf),c(0,1),lty=1)
eff<-matrix(c(alphaj,alphatwo),length(c),2)
matplot(c,eff,type="l", col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency", main=expression(paste('Efficiency ',hat(alpha))))
lines(c(0,1),c(0,1))
lines(c(yhalf,yhalf),c(0,1),lty=1)
eff<-matrix(c(betaj,betatwo),length(c),2)
matplot(c,eff,type="l",col=1,xlab=expression(paste('P(J=2|Y=0)')),ylab="Efficiency", main=expression(paste('Efficiency ',hat(beta))))
lines(c(0,1),c(0,1))
lines(c(yhalf,yhalf),c(0,1),lty=1)