require(spdep)
# Function to perform permutation test for significant
# component patterns (univariate y)
# ----------------------------------------------------
get.K.univariate <- function(y=y1, Vmat=V, Lambda=lambda, Plot=FALSE,
                             R=999){
  obs <- cor(Vmat, y)^2
  sol <- rep(NA, R)
  n <- length(y)
  for(r in 1:R){
    test <- cor(Vmat, sample(y))^2
    sol[r] <- max(test)
  }
  K <- c(1:ncol(Vmat))[obs > quantile(sol, 0.95)]
  if(Plot==TRUE){
    barplot(as.vector(obs),
            col=as.numeric(is.element(c(1:(n-1)), K)))
  }
  return(K)
}
##################################################
nsim <- 1000
N <- 30
pos <- 1:N
#########################
# Similarity Matrices #
#########################
# Define 3 similarity matrix options for SCR
nb <- cell2nb(N,1)
listw <- nb2listw(nb, style="W")
W <- listw2mat(listw)
W = (W + t(W))/2
W1 <- W
W0 <- 1/(abs(row(W) - col(W)))
W2 <- W0
diag(W2) <- 0
W0 <- 0.9^(abs(row(W) - col(W)))
W3 <- W0
diag(W3) <- 0
# Subtract row and column means, add back global mean
# to weight matrices.
# Perform eigendecomposition of centered weight
# matrices to obtain 'spatial components'
# Find the eigenvector in the direction of 1_n and
# remove it from the eigenvector matrix
Amat <- diag(N) - 1/N*(matrix(1, N, N))
W1 <- Amat %*% W1 %*% Amat
res1 <- eigen(W1, symmetric = TRUE)
V1 <- res1$vectors
eq1 <- apply(as.matrix(res1$values/max(abs(res1$values))),
             1, function(x) identical(all.equal(x, 0), TRUE))
V1 <- V1[,eq1==FALSE]
W2 <- Amat %*% W2 %*% Amat
res2 <- eigen(W2, symmetric = TRUE)
V2 <- res2$vectors
eq2 <- apply(as.matrix(res2$values/max(abs(res2$values))),
             1, function(x) identical(all.equal(x, 0), TRUE))
V2 <- V2[,eq2==FALSE]
W3 <- Amat %*% W3 %*% Amat
res3 <- eigen(W3, symmetric = TRUE)
V3 <- res3$vectors
eq3 <- apply(as.matrix(res3$values/max(abs(res3$values))),
             1, function(x) identical(all.equal(x, 0), TRUE))
V3 <- V3[,eq3==FALSE]
#########################
# PredOption:
# ----------
# 1 = Included predictor of interest with fixed
# spatial component
# 2 = Included predictor of interest with spatial
# auto-correlation
# ErrOption:
# ----------
# 0 = No spatial auto-correlation
# 1 = Missing predictor with fixed spatial component
# 2 = Spatial auto-correlation in errors
# 3 = Location directly contributes to outcome
Beta1Vec <- c(0, 1)
coefs0.means <- coefsSCR1.means <- coefsSCR2.means <- coefsSCR3.means <-
  coefsLoc.means <- array(0, dim=c(2,4,2), dimnames=c('PredictorType',
                                                      'AutocorrelationType', 'BetaValue'))
coefs0.sds <- coefsSCR1.sds <- coefsSCR2.sds <- coefsSCR3.sds <-
  coefsLoc.sds <- array(0, dim=c(2,4,2), dimnames=c('PredictorType',
                                                    'AutocorrelationType', 'BetaValue'))
rejRates0 <- rejRatesSCR1 <- rejRatesSCR2 <- rejRatesSCR3 <- rejRatesLoc
<- array(0, dim=c(2,4,2), dimnames=c('PredictorType',
                                     'AutocorrelationType', 'BetaValue'))
CIcov0 <- CIcovSCR1 <- CIcovSCR2 <- CIcovSCR3 <- CIcovLoc <- array(0,
                                                                   dim=c(2,4,2), dimnames=c('PredictorType', 'AutocorrelationType',
                                                                                            'BetaValue'))
nKvals1 <- nKvals2 <- nKvals3 <- array(0, dim=c(2,4,2,nsim),
                                       dimnames=c('PredictorType', 'AutocorrelationType', 'BetaValue'))
pvalTot0 <- rep(0, nsim)
for(pp in 1:2){
  for(ee in 1:4){
    for(bb in 1:2){
      PredOption <- pp
      ErrOption <- ee-1
      Beta <- c(Beta1Vec[bb], 0)
      coefs0 <- matrix(0, ncol=3, nrow=nsim)
      coefsSCR1 <- coefsSCR2 <- coefsSCR3 <- matrix(0, ncol=3,
                                                    nrow=nsim)
      coefsLoc <- matrix(0, ncol=4, nrow=nsim)
      stder0 <- matrix(0, ncol=3, nrow=nsim)
      stderSCR1 <- stderSCR2 <- stderSCR3 <- matrix(0, ncol=3,
                                                    nrow=nsim)
      stderLoc <- matrix(0, ncol=4, nrow=nsim)
      pvals0 <- matrix(0, ncol=3, nrow=nsim)
      pvalsSCR1 <- pvalsSCR2 <- pvalsSCR3 <- matrix(0, ncol=3,
                                                    nrow=nsim)
      pvalsLoc <- matrix(0, ncol=4, nrow=nsim)
      nK1 <- nK2 <- nK3 <- rep(0, nsim)
      Kvecs1 <- Kvecs2 <- Kvecs3 <- matrix(0, ncol=N,
                                           nrow=nsim)
      for(i in 1:nsim){
        if(PredOption == 1){
          X1 <- rnorm(N, 10*pos/N, 2)
        }else if(PredOption == 2){
          X1 <- rnorm(N)
          cor1 <- matrix(0, N, N)
          cor1 <- 0.75^(abs(row(cor1) - col(cor1)))
          X1 <- as.vector(X1 %*% chol(cor1))
        }
        X2 <- rnorm(N)
        if(ErrOption == 0){
          y0 <- as.vector(1 + X1*Beta[1] + X2*Beta[2]
                          + rnorm(N))
        }else if(ErrOption == 1){
          X3 <- rnorm(N, 5*pos/N, 2)
          Beta3 <- 1
          y0 <- as.vector(1 + X1*Beta[1] + X2*Beta[2]
                          +
                            X3*Beta3 + rnorm(N))
        }else if(ErrOption == 2){
          Err.cov <- matrix(0, N, N)
          diag(Err.cov) <- 1
          Err.cov <- 0.75^(abs(row(Err.cov) -
                                 col(Err.cov)))
          Errs <- rnorm(N) %*% chol(Err.cov)
          y0 <- as.vector(1 + X1*Beta[1] + X2*Beta[2]
                          + Errs)
        }else if(ErrOption == 3){
          y0 <- as.vector(1 + X1*Beta[1] + X2*Beta[2]
                          +
                            1/100*(pos - N/3)^2 + rnorm(N))
        }
        mod0 <- lm(y0 ~ X1 + X2)
        sum0 <- summary(mod0)
        coefs0[i,] <- mod0$coef
        stder0[i,] <- sum0$coef[,2]
        pvals0[i,] <- sum0$coef[,4]
        pvalTot0[i] <- 1 - pf(sum0$fstat[1],
                              sum0$fstat[2], sum0$fstat[3])
        X0 <- cbind(X1, X2)
        X0 <- scale(X0, center=T, scale=F)
        y0 <- scale(y0, center=T, scale=F)
        
# Perform permutation test to test whether the correlation
# between residuals and the 'spatial components'
# is signficant at level 0.05
        K1 <- get.K.univariate(y=residuals(lm(y0 ~ X0)),
                               Vmat=V1,
                               Lambda=1, Plot=F, R=999)
        K2 <- get.K.univariate(y=residuals(lm(y0 ~ X0)),
                               Vmat=V2,
                               Lambda=1, Plot=F, R=999)
        K3 <- get.K.univariate(y=residuals(lm(y0 ~ X0)),
                               Vmat=V3,
                               Lambda=1, Plot=F, R=999)
# If there are no signficant correlations withspatial
# components, the SCR model is just the original
# model (note that since y0 and X0 are centered,
# there will be no intercept term).
        if(length(K1) == 0){

Kvecs1[i,] <- 0
modSCR1 <- lm(y0 ~ X0)
sumSCR1 <- summary(modSCR1)
        }else{
# If there are significant correlations with spatial
# components, we project y0 and X0 onto the remaining
# components (equivalent to taking theresiduals of
# y0 and X0 after subracting the projections
# of y0 and X0 on the significant components) and
# fit a linear regression with these projected variables.
y0.proj1 <- as.vector(t(y0) %*% V1[,-K1] %*%
                        t(V1[,-K1]))
X0.proj1 <- t(t(X0) %*% V1[,-K1] %*% t(V1[,-
                                            K1]))
modSCR1 <- lm(y0.proj1 ~ X0.proj1)
sumSCR1 <- summary(modSCR1)
# Store the index of the components that are
# significantly correlated with the residuals.
Kvecs1[i,] <- 0
Kvecs1[i,K1] <- 1
        }
if(length(K2) == 0){
  Kvecs2[i,] <- 0
  modSCR2 <- lm(y0 ~ X0)
  sumSCR2 <- summary(modSCR2)
}else{
  y0.proj2 <- as.vector(t(y0) %*% V2[,-K2] %*%
                          t(V2[,-K2]))
  X0.proj2 <- t(t(X0) %*% V2[,-K2] %*% t(V2[,-
                                              K2]))
  modSCR2 <- lm(y0.proj2 ~ X0.proj2)
  sumSCR2 <- summary(modSCR2)
  Kvecs2[i,] <- 0
  Kvecs2[i,K2] <- 1
}
if(length(K3) == 0){
  Kvecs3[i,] <- 0
  modSCR3 <- lm(y0 ~ X0)
  sumSCR3 <- summary(modSCR3)
}else{
  y0.proj3 <- as.vector(t(y0) %*% V3[,-K3] %*%
                          t(V3[,-K3]))
  X0.proj3 <- t(t(X0) %*% V3[,-K3] %*% t(V3[,-
                                              K3]))
  modSCR3 <- lm(y0.proj3 ~ X0.proj3)
  sumSCR3 <- summary(modSCR3)
  Kvecs3[i,] <- 0
  Kvecs3[i,K3] <- 1
}
# Count how many components are significantly correlated
# with the residuals for each simulated data set.
nK1[i] <- length(K1)
nK2[i] <- length(K2)
nK3[i] <- length(K3)
coefsSCR1[i,] <- modSCR1$coef
stderSCR1[i,] <- sumSCR1$coef[,2]
pvalsSCR1[i,] <- sumSCR1$coef[,4]
coefsSCR2[i,] <- modSCR2$coef
stderSCR2[i,] <- sumSCR2$coef[,2]
pvalsSCR2[i,] <- sumSCR2$coef[,4]
coefsSCR3[i,] <- modSCR3$coef
stderSCR3[i,] <- sumSCR3$coef[,2]
pvalsSCR3[i,] <- sumSCR3$coef[,4]
# Fit a model including location as a predictor.
# This will address the issues with some types of
# auto-correlation in the response, but not others.
modLoc <- lm(y0 ~ X0 + pos)
sumLoc <- summary(modLoc)
coefsLoc[i,] <- modLoc$coef
stderLoc[i,] <- sumLoc$coef[,2]
pvalsLoc[i,] <- sumLoc$coef[,4]
# Progress reporting
if(i %% 50 == 0) print(i)
      }
nKvals1[pp,ee,bb,] <- nK1
nKvals2[pp,ee,bb,] <- nK2
nKvals3[pp,ee,bb,] <- nK3
coefs0.means[pp,ee,bb] <- apply(coefs0, 2, mean)[2]
coefsSCR1.means[pp,ee,bb] <- apply(coefsSCR1, 2,
                                   mean)[2]
coefsSCR2.means[pp,ee,bb] <- apply(coefsSCR2, 2,
                                   mean)[2]
coefsSCR3.means[pp,ee,bb] <- apply(coefsSCR3, 2,
                                   mean)[2]
coefsLoc.means[pp,ee,bb] <- apply(coefsLoc, 2, mean)[2]
coefs0.sds[pp,ee,bb] <- apply(coefs0, 2, sd)[2]
coefsSCR1.sds[pp,ee,bb] <- apply(coefsSCR1, 2, sd)[2]
coefsSCR2.sds[pp,ee,bb] <- apply(coefsSCR2, 2, sd)[2]
coefsSCR3.sds[pp,ee,bb] <- apply(coefsSCR3, 2, sd)[2]
coefsLoc.sds[pp,ee,bb] <- apply(coefsLoc, 2, sd)[2]
rejRates0[pp,ee,bb] <- apply(pvals0 < 0.05, 2, mean)[2]
rejRatesSCR1[pp,ee,bb] <- apply(pvalsSCR1 < 0.05, 2,
                                mean)[2]
rejRatesSCR2[pp,ee,bb] <- apply(pvalsSCR2 < 0.05, 2,
                                mean)[2]
rejRatesSCR3[pp,ee,bb] <- apply(pvalsSCR3 < 0.05, 2,
                                mean)[2]
rejRatesLoc[pp,ee,bb] <- apply(pvalsLoc < 0.05, 2,
                               mean)[2]
# Confidence interval coverage: should be 95%
# for the second and third elements (first element
# is the predictor, which is messed up by the centering).
CIcov0[pp,ee,bb] <- apply(t(t(coefs0 - qt(0.975, N-
                                            3)*stder0) < c(1, Beta) & t(coefs0 + qt(0.975, N-3)*stder0) > c(1,
                                                                                                            Beta)), 2, mean)[2]
CIcovSCR1[pp,ee,bb] <- apply(t(t(coefsSCR1 - qt(0.975,
                                                N-3)*stderSCR1) < c(1, Beta) & t(coefsSCR1 + qt(0.975, N-3)*stderSCR1) >
                                 c(1, Beta)), 2, mean)[2]
CIcovSCR2[pp,ee,bb] <- apply(t(t(coefsSCR2 - qt(0.975,
                                                N-3)*stderSCR2) < c(1, Beta) & t(coefsSCR2 + qt(0.975, N-3)*stderSCR2) >
                                 c(1, Beta)), 2, mean)[2]
CIcovSCR3[pp,ee,bb] <- apply(t(t(coefsSCR3 - qt(0.975,
                                                N-3)*stderSCR3) < c(1, Beta) & t(coefsSCR3 + qt(0.975, N-3)*stderSCR3) >
                                 c(1, Beta)), 2, mean)[2]
CIcovLoc[pp,ee,bb] <- apply(t(t(coefsLoc - qt(0.975, N-
                                                3)*stderLoc) < c(1, Beta, 0) & t(coefsLoc + qt(0.975, N-3)*stderLoc) >
                                c(1, Beta, 0)), 2, mean)[2]
    }
  }
}
# Creating tables
for(pp in 1:2){
  for(ee in 1:4){
    for(bb in 1:2){
      rslt <- cbind(c(coefs0.means[pp,ee,bb],
                      coefsSCR1.means[pp,ee,bb],
                      coefsSCR2.means[pp,ee,bb],
                      coefsSCR3.means[pp,ee,bb],
                      coefsLoc.means[pp,ee,bb]),
                    c(rejRates0[pp,ee,bb], rejRatesSCR1[pp,ee,bb],
                      rejRatesSCR2[pp,ee,bb], rejRatesSCR3[pp,ee,bb],
                      rejRatesLoc[pp,ee,bb]),
                    c(CIcov0[pp,ee,bb], CIcovSCR1[pp,ee,bb],
                      CIcovSCR2[pp,ee,bb],
                      CIcovSCR3[pp,ee,bb], CIcovLoc[pp,ee,bb]))
      rownames(rslt) <- c("OLS", "SCR W1", "SCR W2", "SCR W3",
                          "OLS + Loc")
      colnames(rslt) <- c("Coef. Est. Mean",
                          "Rejection Rate", "CI Coverage")
      print(paste("pp = ", pp, ", ee = ", ee, ", bb = ", bb,
                  sep=""))
      print(rslt)
    }
  }
}