#============================================================================================================
#
# Licensing:
#
# Copyright Maria Grünewald and Ola Hössjer (2010)
#
# This code is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version. See .
#
# This code is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
#
# Citation:
#
# The methodology used in this library is described in Grünewald & Hössjer (2011) A General Statistical Framework
# for Multistage Designs, ...
# Please refer to this paper when using the library.
#============================================================================================================#
# Contents:
#
# This library computes and plots efficiencies for sequential design in linear regression (Gaussian outcome)
# For description of the model see Example 4 in
# Grünewald & Hössjer (2011) A General Statistical Framework
# for Multistage Designs, ...
#
#
# Output:
#
# File "EfficiencyLinearRegression.txt" saved to R working directory
# containing settings and numerical results from simulation run
# Figures shown in R Graphics devise
# *Efficiency plot for regression parameter hat(beta)
# *Cost efficiency plot for regression parameter hat(beta)
#
#============================================================================================================
#
# The code is intended for use in the software R (version 2.10.0):
# R Development Core Team (2008). R: A language and environment for
# statistical computing. R Foundation for Statistical Computing,
# Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org.
#
#============================================================================================================
#============================================================================================================
# Setting parameters for your calculations
#============================================================================================================
#------------------------------------------------------------------------------------------------------------
# Parameters specified by user
#------------------------------------------------------------------------------------------------------------
# Number of simulated individuals. This number does not refer to the number of subjects in the actual study,
# but only affects how well the Monte Calo method approximates theoretical results.
N<-500
# Parameter for x~Bin(1,p)
# p=exp(alphax)/(1+exp(alphax))
alphax<-2.037135
# parameters for y~N(mu,sigma)
# mu=alpha+beta*x
alpha<-3.220368
beta<-0.013593802
sigma<-sqrt(0.02858095)
# for sampling scheme with only two regions "cutpoint" determines these.
# See function "weights" for further specification
cutpoint<-log(30)
# Specifying sampling probabilities P(J=2|y=1)
pay0s=c(0.00005,0.0005,0.005,0.02,0.04,0.06,0.08,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.95,0.975,1)
#------------------------------------------------------------------------------------------------------------
# The following parameters follow from settings above
#------------------------------------------------------------------------------------------------------------
#number of parameters in the model
parnum<-4
# number of parameters in explanatory variable
xparnum<-1
parnumfirsty<-xparnum+1
xgroups<-2
ygroups<-2
p<-exp(alphax)/(1+exp(alphax))
#============================================================================================================
# Functions to simulate data and create weights for Monte carlo calculations
#============================================================================================================
# Simulating data for linear regression
simdata<-function(N,p,xgroups,effekt,meanaa,sigma2){
sim<-matrix(8,N,2)
# x~Bin(xgroups-1,p)
x<-rbinom(N,xgroups-1,p)
#y|x~N(meanaa+x*effekt,sqrt(sigma2))
y<-rnorm(N,meanaa+x*effekt,sqrt(sigma2))
sim[,1]<-x
sim[,2]<-y
return(sim)
}
# The weights below corresponds to the following sampling scheme
# P(A|y>=cutpoint)=1
# P(A|y= cutpoint) {
ascprob[i,]<-1
yhigh<-yhigh+1
}
if (y[i] < cutpoint){
ascprob[i,]<-pay0
}
}
return(list(ascprob=ascprob,yhigh=yhigh))
}
#============================================================================================================
# Functions to calculate efficiency
#============================================================================================================
#------------------------------------------------------------------------------------------------------------
# Function to calculate \psi
#------------------------------------------------------------------------------------------------------------
psi<-function(N,alphax,xgroups,alpha, beta,sigma,x,y){
dalphax<-(exp(alphax)*(x-(xgroups-1))+x)/(1+exp(alphax))
dalpha<-(-(alpha+beta*x-y))/sigma^2
dbeta<-x*(-(alpha+beta*x-y))/sigma^2
dsigma<-(-sigma^2+(alpha+beta*x-y)^2)/sigma^3
dpar<-matrix(c(dalphax,dalpha,dbeta,dsigma),N,parnum)
return(dpar)
}
#------------------------------------------------------------------------------------------------------------
# Function to calculate P(z)
#------------------------------------------------------------------------------------------------------------
Pz<-function(alphax,xgroups, alpha, beta,sigma,x,y){
dnorm(y, mean=(alpha+beta*x), sd=sigma)*dbinom(x, size=(xgroups-1), prob=exp(alphax)/(1+exp(alphax)))
}
#------------------------------------------------------------------------------------------------------------
# Function to calculate P(y)
#------------------------------------------------------------------------------------------------------------
Py<-function(alphax,xgroups, alpha, beta,sigma,x,y){
py<-0
for(j in 1:xgroups){
py<-Pz(alphax,xgroups, alpha, beta,sigma,(j-1),y)+py
}
return(py)
}
#------------------------------------------------------------------------------------------------------------
# Function to calculate variances
#------------------------------------------------------------------------------------------------------------
variances<-function(N,alphax,beta,alpha,sigma,xgroups,x,y,w,parnum,psidata,Iy){
pA<-sum(w)/N
#--------------------------------------------------------------------------------------------------------
# Calculating the fist term of the information as E[(f'/f)^2]
# (Could be computed more efficiently since symmetry)
#--------------------------------------------------------------------------------------------------------
infopart1<-matrix(0,parnum,parnum)
for (j in 1:length(y)){
Epsiy2<-matrix(0,parnum,parnum)
for (k in 1:xgroups){
psidatajs<-psi(1,alphax,xgroups,alpha, beta,sigma,(k-1),y[j])
Epsiy2<-w[j,]*Pz(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])/Py(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])*t(psidatajs)%*%psidatajs+Epsiy2
}
infopart1<-Epsiy2/N+infopart1
}
#--------------------------------------------------------------------------------------------------------
# Calculating the second term of the information
#--------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------
#Joint likelihood
#--------------------------------------------------------------------------------------------------------
infopart2joint<-matrix(NA,parnum,parnum)
for (i in 1:parnum){
for (j in 1:parnum){
infopart2joint[i,j]<-sum(psidata[,i]*w)/N*sum(psidata[,j]*w)/N
}
}
#--------------------------------------------------------------------------------------------------------
#Retrospective likelihood
#--------------------------------------------------------------------------------------------------------
sumIyretro<-matrix(0,parnum,parnum)
for (j in 1:length(y)){
Epsiy<-matrix(0,1,parnum)
for (k in 1:xgroups){
psidatajs<-psi(1,alphax,xgroups,alpha, beta,sigma,(k-1),y[j])
Epsiy<-Pz(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])/Py(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])*psidatajs+Epsiy
}
sumIyretro<-w[j,]*t(Epsiy)%*%Epsiy+sumIyretro
}
Iretro<-(infopart1-sumIyretro/N)
#--------------------------------------------------------------------------------------------------------
# Calculating joint and twostage information matrices from the parts above
#--------------------------------------------------------------------------------------------------------
infojoint<-infopart1-infopart2joint
infotwo<-Iy+Iretro
infoSEM<-infojoint+infopart2joint*pA/(1-pA)
#--------------------------------------------------------------------------------------------------------
# Inverting the information matrix to get variance covariance matrix
#--------------------------------------------------------------------------------------------------------
variancematrixjoint<-solve(infojoint)
variancematrixtwo<-solve(infotwo)
variansmatrixSEM<-solve(infoSEM)
#--------------------------------------------------------------------------------------------------------
# Specifying output of function "variancematrix"
#--------------------------------------------------------------------------------------------------------
return(list(variancematrixjoint=variancematrixjoint,variancematrixtwo=variancematrixtwo,variansmatrixSEM=variansmatrixSEM))
}
#============================================================================================================
# Running simulations
#============================================================================================================
#------------------------------------------------------------------------------------------------------------
# Simulate data for linear regression
#------------------------------------------------------------------------------------------------------------
data<-simdata(N,p,xgroups,beta,alpha,sigma^2)
x<-data[,1]
y<-data[,2]
#------------------------------------------------------------------------------------------------------------
# Calculating psi for simulated datausing function "psi"
#------------------------------------------------------------------------------------------------------------
psidata<-psi(N,alphax,xgroups,alpha, beta,sigma,x,y)
#------------------------------------------------------------------------------------------------------------
# Calculating \hat{I}_{Y}(\theta)
#------------------------------------------------------------------------------------------------------------
sumIy<-matrix(0,parnum,parnum)
for (j in 1:length(y)){
Epsiy<-matrix(0,1,parnum)
for (k in 1:xgroups){
psidatajs<-psi(1,alphax,xgroups,alpha, beta,sigma,(k-1),y[j])
Epsiy<-Pz(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])/Py(alphax,xgroups, alpha, beta,sigma,(k-1),y[j])*psidatajs+Epsiy
}
sumIy<-t(Epsiy)%*%Epsiy+ sumIy
}
Iy<-sumIy/N
#------------------------------------------------------------------------------------------------------------
# Running Monte Carlo simulations
#------------------------------------------------------------------------------------------------------------
varperpay0s<-matrix(NA,parnum,length(pay0s))
varperpay0stwo<-matrix(NA,parnum,length(pay0s))
# (The likelihood below is not plotted in this paper)
varperpay0sSEM<-matrix(NA,parnum,length(pay0s))
pAhat<-matrix(NA,1,length(pay0s))
yhighs<-matrix(NA,1,length(pay0s))
for (h in 1:length(pay0s)){
weightcalc<-weights(y,pay0s[h],cutpoint)
w<-weightcalc$ascprob
yhighs[,h]<-weightcalc$yhigh/sum(w)
pAhat[,h]<-sum(w)/N
varperpay0<-variances(N,alphax,beta,alpha,sigma,xgroups,x,y,w,parnum,psidata,Iy)
#Ascertainment sample
varperpay0s[,h]<-diag(varperpay0$variancematrixjoint)
#Twostage sample
varperpay0stwo[,h]<-diag(varperpay0$variancematrixtwo)
varperpay0sSEM[,h]<-diag(varperpay0$variansmatrixSEM)
}
#------------------------------------------------------------------------------------------------------------
# Saving the results to your working directory
#------------------------------------------------------------------------------------------------------------
results<-list(title="Efficiency of samling schemes for linear regression", created=date(), N=N,alphax=alphax,xgroups=xgroups,alpha=alpha, beta=beta,ygroups=ygroups, cutpoint=cutpoint,pay0s=pay0s,varperpay0s=varperpay0s,varperpay0sSEM=varperpay0sSEM,varperpay0stwo=varperpay0stwo,pAhat=pAhat,yhighs=yhighs)
dput(results, file = "EfficiencyLinearRegression.txt")
#============================================================================================================
# Creating plots
#============================================================================================================
#------------------------------------------------------------------------------------------------------------
#Retrieving your results from file in your working directory
#------------------------------------------------------------------------------------------------------------
ResultsFromFile<-dget(file = "EfficiencyLinearRegression.txt")
#If you have trouble saving the file you can instead use the following line
#ResultsFromFile<-results
#------------------------------------------------------------------------------------------------------------
# Preparation for plots
#-----------------------------------------------------------------------------------------------------------
LinRegtwo<-ResultsFromFile$varperpay0stwo
pay0s<-ResultsFromFile$pay0s
c<-ResultsFromFile$pAhat
betaLinReg<-LinRegtwo[3,length(pay0s)]/LinRegtwo[3,]
cLinReg<-matrix(ResultsFromFile$pay0s,length(ResultsFromFile$pay0s),1)
#------------------------------------------------------------------------------------------------------------
# Chosing your cost functions (three different cost functions)
#------------------------------------------------------------------------------------------------------------
cost1<-0
cost2<-1
RAC1<-((c*cost2+(1-c)*cost1)/cost2)
cost1<-1/3
cost2<-1
RAC2<-((c*cost2+(1-c)*cost1)/cost2)
cost1<-0.5
cost2<-1
RAC3<-((c*cost2+(1-c)*cost1)/cost2)
#----------------------------------------------------------------------------------------
# Plots efficiency hat(beta)
#----------------------------------------------------------------------------------------
op<-par(mfrow=c(2,2))
# Efficiency
op<-par(mfrow=c(1,2))
eff<-matrix(c(betaLinReg),length(cLinReg),1)
matplot(cLinReg,eff,type="l",col=1,xlab=expression(paste('P(J=2| BMI<30)')),ylab="Efficiency")
#abline(reg)
lines(c(0,1),c(0,1))
title('Efficiency of regression parameter')
# Cost adjusted efficiency
ul<-max(c(betaLinReg/t(RAC1),betaLinReg/t(RAC2),betaLinReg/t(RAC3)))+0.1
eff<-matrix(c(betaLinReg/t(RAC1),betaLinReg/t(RAC2),betaLinReg/t(RAC3)),length(c),3)
matplot(cLinReg,eff,type="l",col=1,xlim=c(0,1),ylim=c(0,ul),xlab=expression(paste('P(J=2| BMI<30)')),ylab="Cost adjusted efficiency")
lines(c(0,1),c(1,1))
title('CE of regression parameter')