-
Notifications
You must be signed in to change notification settings - Fork 0
/
simulate_binomial_data.R
128 lines (97 loc) · 3.66 KB
/
simulate_binomial_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
################################################################################
############################### Simulation study ###############################
################################################################################
# This is an adaptation of the script by Zhi Zhao ([email protected])
# functions to simulate two heterogeneous data sources with bivariate response
# used in Zhao and Zucknick (2020).
library(Matrix)
library(MASS)
library(mvtnorm)
covariance_matrix <- function(rho, p, b, num_nonpen) {
# Generate covariance matrix
Ap1 <- matrix(rep(rho, (p[1] / b) ^ 2), nrow = p[1] / b)
diag(Ap1) <- rep(1, p[1] / b)
Ap2 <- matrix(rep(rho, (p[2] / b) ^ 2), nrow = p[2] / b)
diag(Ap2) <- rep(1, p[2] / b)
Bp12 <- matrix(rep(rho, p[1] / b * p[2] / b), nrow = p[1] / b)
Bp21 <- matrix(rep(rho, p[1] / b * p[2] / b), nrow = p[2] / b)
Xsigma1 <- Ap1
Xsigma2 <- Ap2
Xsigma12 <- Bp12
Xsigma21 <- Bp21
for (i in 2:b) {
Xsigma1 <- bdiag(Xsigma1, Ap1)
Xsigma12 <- bdiag(Xsigma12, Bp12)
Xsigma2 <- bdiag(Xsigma2, Ap2)
Xsigma21 <- bdiag(Xsigma21, Bp21)
}
Xsigma <-
rbind(cbind(Xsigma1, Xsigma12), cbind(Xsigma21, Xsigma2))
if(num_nonpen > 0){
nonpenSigma <-
matrix(rep(rho, num_nonpen^2), nrow = num_nonpen)
diag(nonpenSigma) <- rep(1, num_nonpen)
Xsigma <- bdiag(nonpenSigma, Xsigma)
}
return(as.matrix(Xsigma))
}
simulation_data <-
function(p = c(100, 100),
p_relevant_set = c(10, 10),
beta_set = c(0.2, 0.6),
num_nonpen = 0,
beta_nonpen = 0.7,
n = 100,
rho = .4,
prop = .5,
dicotomise_second_layer = FALSE,
diagonal_covariance = FALSE) {
b = 10
if (is.na(p[2]) |
is.na(p_relevant_set[2]) | is.na(beta_set[2])) {
cat("Please specify bidimensional p, p_relevant_set, and beta_set
vectors.\n")
}
Y <- rbinom(n, 1, prop)
if (diagonal_covariance) {
Xsigma <- diag(p[1] + p[2] + num_nonpen)
} else{
Xsigma <- covariance_matrix(rho, p, b, num_nonpen)
}
X <- rmvnorm(n,
mean = rep(0, num_nonpen + p[1] + p[2]),
sigma = as.matrix(Xsigma))
X0 <- X[, 1:num_nonpen]
X1 <- X[, (1 + num_nonpen):(p[1] + num_nonpen)]
X2 <- X[, (p[1] + 1 + num_nonpen):(p[1] + p[2] + num_nonpen)]
relevant_covariates <- c()
if (p_relevant_set[1] > 0 & beta_set[1] != 0) {
ind.rel <- sample(ncol(X1), p_relevant_set[1])
relevant_covariates <- 1:p_relevant_set[1] + num_nonpen
X1 <- cbind(X1[, ind.rel], X1[, -ind.rel])
X1[Y == 1, 1:p_relevant_set[1]] <-
X1[Y == 1, 1:p_relevant_set[1]] + beta_set[1]
}
if (p_relevant_set[2] > 0 & beta_set[2] != 0) {
ind.rel <- sample(ncol(X2), p_relevant_set[2])
relevant_covariates <- c(relevant_covariates,
1:p_relevant_set[2] + num_nonpen + p[1])
X2 <- cbind(X2[, ind.rel], X2[, -ind.rel])
X2[Y == 1, 1:p_relevant_set[2]] <-
X2[Y == 1, 1:p_relevant_set[2]] + beta_set[2]
}
if (dicotomise_second_layer) {
X2 <- data.matrix(X2 > 0) + 0 # Make this binary
}
X = cbind(X1, X2)
if (num_nonpen > 0) {
relevant_covariates <- c(relevant_covariates, 1:num_nonpen)
X0[Y == 1,] <- X0[Y == 1,] + beta_nonpen
X <- cbind(X0, X)
}
return(list(Y = Y, # Dependent variable
X = X, # Independent variables
p = p, # Number of variables in each layer
relevant_covariates = sort(relevant_covariates)
))
}