require(spdep) # Function to perform permutation test for significant # component patterns (univariate y) # ---------------------------------------------------- get.K.univariate <- function(y=y1, Vmat=V, Lambda=lambda, Plot=FALSE, R=999){ obs <- cor(Vmat, y)^2 sol <- rep(NA, R) n <- length(y) for(r in 1:R){ test <- cor(Vmat, sample(y))^2 sol[r] <- max(test) } K <- c(1:ncol(Vmat))[obs > quantile(sol, 0.95)] if(Plot==TRUE){ barplot(as.vector(obs), col=as.numeric(is.element(c(1:(n-1)), K))) } return(K) } ################################################## nsim <- 1000 N <- 30 pos <- 1:N ######################### # Similarity Matrices # ######################### # Define 3 similarity matrix options for SCR nb <- cell2nb(N,1) listw <- nb2listw(nb, style="W") W <- listw2mat(listw) W = (W + t(W))/2 W1 <- W W0 <- 1/(abs(row(W) - col(W))) W2 <- W0 diag(W2) <- 0 W0 <- 0.9^(abs(row(W) - col(W))) W3 <- W0 diag(W3) <- 0 # Subtract row and column means, add back global mean # to weight matrices. # Perform eigendecomposition of centered weight # matrices to obtain 'spatial components' # Find the eigenvector in the direction of 1_n and # remove it from the eigenvector matrix Amat <- diag(N) - 1/N*(matrix(1, N, N)) W1 <- Amat %*% W1 %*% Amat res1 <- eigen(W1, symmetric = TRUE) V1 <- res1$vectors eq1 <- apply(as.matrix(res1$values/max(abs(res1$values))), 1, function(x) identical(all.equal(x, 0), TRUE)) V1 <- V1[,eq1==FALSE] W2 <- Amat %*% W2 %*% Amat res2 <- eigen(W2, symmetric = TRUE) V2 <- res2$vectors eq2 <- apply(as.matrix(res2$values/max(abs(res2$values))), 1, function(x) identical(all.equal(x, 0), TRUE)) V2 <- V2[,eq2==FALSE] W3 <- Amat %*% W3 %*% Amat res3 <- eigen(W3, symmetric = TRUE) V3 <- res3$vectors eq3 <- apply(as.matrix(res3$values/max(abs(res3$values))), 1, function(x) identical(all.equal(x, 0), TRUE)) V3 <- V3[,eq3==FALSE] ######################### # PredOption: # ---------- # 1 = Included predictor of interest with fixed # spatial component # 2 = Included predictor of interest with spatial # auto-correlation # ErrOption: # ---------- # 0 = No spatial auto-correlation # 1 = Missing predictor with fixed spatial component # 2 = Spatial auto-correlation in errors # 3 = Location directly contributes to outcome Beta1Vec <- c(0, 1) coefs0.means <- coefsSCR1.means <- coefsSCR2.means <- coefsSCR3.means <- coefsLoc.means <- array(0, dim=c(2,4,2), dimnames=c('PredictorType', 'AutocorrelationType', 'BetaValue')) coefs0.sds <- coefsSCR1.sds <- coefsSCR2.sds <- coefsSCR3.sds <- coefsLoc.sds <- array(0, dim=c(2,4,2), dimnames=c('PredictorType', 'AutocorrelationType', 'BetaValue')) rejRates0 <- rejRatesSCR1 <- rejRatesSCR2 <- rejRatesSCR3 <- rejRatesLoc <- array(0, dim=c(2,4,2), dimnames=c('PredictorType', 'AutocorrelationType', 'BetaValue')) CIcov0 <- CIcovSCR1 <- CIcovSCR2 <- CIcovSCR3 <- CIcovLoc <- array(0, dim=c(2,4,2), dimnames=c('PredictorType', 'AutocorrelationType', 'BetaValue')) nKvals1 <- nKvals2 <- nKvals3 <- array(0, dim=c(2,4,2,nsim), dimnames=c('PredictorType', 'AutocorrelationType', 'BetaValue')) pvalTot0 <- rep(0, nsim) for(pp in 1:2){ for(ee in 1:4){ for(bb in 1:2){ PredOption <- pp ErrOption <- ee-1 Beta <- c(Beta1Vec[bb], 0) coefs0 <- matrix(0, ncol=3, nrow=nsim) coefsSCR1 <- coefsSCR2 <- coefsSCR3 <- matrix(0, ncol=3, nrow=nsim) coefsLoc <- matrix(0, ncol=4, nrow=nsim) stder0 <- matrix(0, ncol=3, nrow=nsim) stderSCR1 <- stderSCR2 <- stderSCR3 <- matrix(0, ncol=3, nrow=nsim) stderLoc <- matrix(0, ncol=4, nrow=nsim) pvals0 <- matrix(0, ncol=3, nrow=nsim) pvalsSCR1 <- pvalsSCR2 <- pvalsSCR3 <- matrix(0, ncol=3, nrow=nsim) pvalsLoc <- matrix(0, ncol=4, nrow=nsim) nK1 <- nK2 <- nK3 <- rep(0, nsim) Kvecs1 <- Kvecs2 <- Kvecs3 <- matrix(0, ncol=N, nrow=nsim) for(i in 1:nsim){ if(PredOption == 1){ X1 <- rnorm(N, 10*pos/N, 2) }else if(PredOption == 2){ X1 <- rnorm(N) cor1 <- matrix(0, N, N) cor1 <- 0.75^(abs(row(cor1) - col(cor1))) X1 <- as.vector(X1 %*% chol(cor1)) } X2 <- rnorm(N) if(ErrOption == 0){ y0 <- as.vector(1 + X1*Beta[1] + X2*Beta[2] + rnorm(N)) }else if(ErrOption == 1){ X3 <- rnorm(N, 5*pos/N, 2) Beta3 <- 1 y0 <- as.vector(1 + X1*Beta[1] + X2*Beta[2] + X3*Beta3 + rnorm(N)) }else if(ErrOption == 2){ Err.cov <- matrix(0, N, N) diag(Err.cov) <- 1 Err.cov <- 0.75^(abs(row(Err.cov) - col(Err.cov))) Errs <- rnorm(N) %*% chol(Err.cov) y0 <- as.vector(1 + X1*Beta[1] + X2*Beta[2] + Errs) }else if(ErrOption == 3){ y0 <- as.vector(1 + X1*Beta[1] + X2*Beta[2] + 1/100*(pos - N/3)^2 + rnorm(N)) } mod0 <- lm(y0 ~ X1 + X2) sum0 <- summary(mod0) coefs0[i,] <- mod0$coef stder0[i,] <- sum0$coef[,2] pvals0[i,] <- sum0$coef[,4] pvalTot0[i] <- 1 - pf(sum0$fstat[1], sum0$fstat[2], sum0$fstat[3]) X0 <- cbind(X1, X2) X0 <- scale(X0, center=T, scale=F) y0 <- scale(y0, center=T, scale=F) # Perform permutation test to test whether the correlation # between residuals and the 'spatial components' # is signficant at level 0.05 K1 <- get.K.univariate(y=residuals(lm(y0 ~ X0)), Vmat=V1, Lambda=1, Plot=F, R=999) K2 <- get.K.univariate(y=residuals(lm(y0 ~ X0)), Vmat=V2, Lambda=1, Plot=F, R=999) K3 <- get.K.univariate(y=residuals(lm(y0 ~ X0)), Vmat=V3, Lambda=1, Plot=F, R=999) # If there are no signficant correlations withspatial # components, the SCR model is just the original # model (note that since y0 and X0 are centered, # there will be no intercept term). if(length(K1) == 0){ Kvecs1[i,] <- 0 modSCR1 <- lm(y0 ~ X0) sumSCR1 <- summary(modSCR1) }else{ # If there are significant correlations with spatial # components, we project y0 and X0 onto the remaining # components (equivalent to taking theresiduals of # y0 and X0 after subracting the projections # of y0 and X0 on the significant components) and # fit a linear regression with these projected variables. y0.proj1 <- as.vector(t(y0) %*% V1[,-K1] %*% t(V1[,-K1])) X0.proj1 <- t(t(X0) %*% V1[,-K1] %*% t(V1[,- K1])) modSCR1 <- lm(y0.proj1 ~ X0.proj1) sumSCR1 <- summary(modSCR1) # Store the index of the components that are # significantly correlated with the residuals. Kvecs1[i,] <- 0 Kvecs1[i,K1] <- 1 } if(length(K2) == 0){ Kvecs2[i,] <- 0 modSCR2 <- lm(y0 ~ X0) sumSCR2 <- summary(modSCR2) }else{ y0.proj2 <- as.vector(t(y0) %*% V2[,-K2] %*% t(V2[,-K2])) X0.proj2 <- t(t(X0) %*% V2[,-K2] %*% t(V2[,- K2])) modSCR2 <- lm(y0.proj2 ~ X0.proj2) sumSCR2 <- summary(modSCR2) Kvecs2[i,] <- 0 Kvecs2[i,K2] <- 1 } if(length(K3) == 0){ Kvecs3[i,] <- 0 modSCR3 <- lm(y0 ~ X0) sumSCR3 <- summary(modSCR3) }else{ y0.proj3 <- as.vector(t(y0) %*% V3[,-K3] %*% t(V3[,-K3])) X0.proj3 <- t(t(X0) %*% V3[,-K3] %*% t(V3[,- K3])) modSCR3 <- lm(y0.proj3 ~ X0.proj3) sumSCR3 <- summary(modSCR3) Kvecs3[i,] <- 0 Kvecs3[i,K3] <- 1 } # Count how many components are significantly correlated # with the residuals for each simulated data set. nK1[i] <- length(K1) nK2[i] <- length(K2) nK3[i] <- length(K3) coefsSCR1[i,] <- modSCR1$coef stderSCR1[i,] <- sumSCR1$coef[,2] pvalsSCR1[i,] <- sumSCR1$coef[,4] coefsSCR2[i,] <- modSCR2$coef stderSCR2[i,] <- sumSCR2$coef[,2] pvalsSCR2[i,] <- sumSCR2$coef[,4] coefsSCR3[i,] <- modSCR3$coef stderSCR3[i,] <- sumSCR3$coef[,2] pvalsSCR3[i,] <- sumSCR3$coef[,4] # Fit a model including location as a predictor. # This will address the issues with some types of # auto-correlation in the response, but not others. modLoc <- lm(y0 ~ X0 + pos) sumLoc <- summary(modLoc) coefsLoc[i,] <- modLoc$coef stderLoc[i,] <- sumLoc$coef[,2] pvalsLoc[i,] <- sumLoc$coef[,4] # Progress reporting if(i %% 50 == 0) print(i) } nKvals1[pp,ee,bb,] <- nK1 nKvals2[pp,ee,bb,] <- nK2 nKvals3[pp,ee,bb,] <- nK3 coefs0.means[pp,ee,bb] <- apply(coefs0, 2, mean)[2] coefsSCR1.means[pp,ee,bb] <- apply(coefsSCR1, 2, mean)[2] coefsSCR2.means[pp,ee,bb] <- apply(coefsSCR2, 2, mean)[2] coefsSCR3.means[pp,ee,bb] <- apply(coefsSCR3, 2, mean)[2] coefsLoc.means[pp,ee,bb] <- apply(coefsLoc, 2, mean)[2] coefs0.sds[pp,ee,bb] <- apply(coefs0, 2, sd)[2] coefsSCR1.sds[pp,ee,bb] <- apply(coefsSCR1, 2, sd)[2] coefsSCR2.sds[pp,ee,bb] <- apply(coefsSCR2, 2, sd)[2] coefsSCR3.sds[pp,ee,bb] <- apply(coefsSCR3, 2, sd)[2] coefsLoc.sds[pp,ee,bb] <- apply(coefsLoc, 2, sd)[2] rejRates0[pp,ee,bb] <- apply(pvals0 < 0.05, 2, mean)[2] rejRatesSCR1[pp,ee,bb] <- apply(pvalsSCR1 < 0.05, 2, mean)[2] rejRatesSCR2[pp,ee,bb] <- apply(pvalsSCR2 < 0.05, 2, mean)[2] rejRatesSCR3[pp,ee,bb] <- apply(pvalsSCR3 < 0.05, 2, mean)[2] rejRatesLoc[pp,ee,bb] <- apply(pvalsLoc < 0.05, 2, mean)[2] # Confidence interval coverage: should be 95% # for the second and third elements (first element # is the predictor, which is messed up by the centering). CIcov0[pp,ee,bb] <- apply(t(t(coefs0 - qt(0.975, N- 3)*stder0) < c(1, Beta) & t(coefs0 + qt(0.975, N-3)*stder0) > c(1, Beta)), 2, mean)[2] CIcovSCR1[pp,ee,bb] <- apply(t(t(coefsSCR1 - qt(0.975, N-3)*stderSCR1) < c(1, Beta) & t(coefsSCR1 + qt(0.975, N-3)*stderSCR1) > c(1, Beta)), 2, mean)[2] CIcovSCR2[pp,ee,bb] <- apply(t(t(coefsSCR2 - qt(0.975, N-3)*stderSCR2) < c(1, Beta) & t(coefsSCR2 + qt(0.975, N-3)*stderSCR2) > c(1, Beta)), 2, mean)[2] CIcovSCR3[pp,ee,bb] <- apply(t(t(coefsSCR3 - qt(0.975, N-3)*stderSCR3) < c(1, Beta) & t(coefsSCR3 + qt(0.975, N-3)*stderSCR3) > c(1, Beta)), 2, mean)[2] CIcovLoc[pp,ee,bb] <- apply(t(t(coefsLoc - qt(0.975, N- 3)*stderLoc) < c(1, Beta, 0) & t(coefsLoc + qt(0.975, N-3)*stderLoc) > c(1, Beta, 0)), 2, mean)[2] } } } # Creating tables for(pp in 1:2){ for(ee in 1:4){ for(bb in 1:2){ rslt <- cbind(c(coefs0.means[pp,ee,bb], coefsSCR1.means[pp,ee,bb], coefsSCR2.means[pp,ee,bb], coefsSCR3.means[pp,ee,bb], coefsLoc.means[pp,ee,bb]), c(rejRates0[pp,ee,bb], rejRatesSCR1[pp,ee,bb], rejRatesSCR2[pp,ee,bb], rejRatesSCR3[pp,ee,bb], rejRatesLoc[pp,ee,bb]), c(CIcov0[pp,ee,bb], CIcovSCR1[pp,ee,bb], CIcovSCR2[pp,ee,bb], CIcovSCR3[pp,ee,bb], CIcovLoc[pp,ee,bb])) rownames(rslt) <- c("OLS", "SCR W1", "SCR W2", "SCR W3", "OLS + Loc") colnames(rslt) <- c("Coef. Est. Mean", "Rejection Rate", "CI Coverage") print(paste("pp = ", pp, ", ee = ", ee, ", bb = ", bb, sep="")) print(rslt) } } }