#####
#####
#####read in the appropriate libraries
library(AICcmodavg)
library(lme4)
library(lattice)
library(mgcv)
library(gamm4)
library(car)
library(cvTools)
library(influence.ME)
library(sp)
library(rgdal)
library(raster)
library(maptools)
library(ape)
library(geosphere)
library(geoR)
library(faraway)
library(PresenceAbsence)
library(MuMIn)

#####
#####
#####read in data
Ponded <- read.csv("BinomialNoRowcropsCombined.csv", header = TRUE)
head(Ponded)
nrow(Ponded)

#####
#####
#####eliminate duplicate values among years
Ponded2 <- subset(Ponded, !duplicated(Ponded$Wetland))
nrow(Ponded2)
colnames(Ponded2)

#obtain mean values for weather variables among years
Wetlands <- unique(Ponded$Wetland)
for(i in 1:nrow(Ponded2)){
	Wetland <- subset(Ponded, Wetland == Wetlands[i])
	Ponded2$espro2[i] <- mean(Wetland$espro2)
	Ponded2$lsrps[i] <- mean(Wetland$lsprs)
	Ponded2$fatmaxm[i] <- mean(Wetland$fatmaxm)
	Ponded2$wprs[i] <- mean(Wetland$wprs)
	Ponded2$wtmaxm[i] <- mean(Wetland$wtmaxm)
	Ponded2$wtmaxsf[i] <- mean(Wetland$wtmaxsf)
} 

#####
#####
#####write new data frames and csv files with assumed climate change scenarios
#Modest Change Scenario
PondedModest <- Ponded2
PondedModest$espro2PostChange <- PondedModest$espro2 + 0.2 * PondedModest$espro2
PondedModest$lsprsPostChange <- PondedModest$lsprs
PondedModest$fatmaxmPostChange <- PondedModest$fatmaxm + 2
PondedModest$wprsPostChange <- PondedModest$wprs + 0.1 * PondedModest$wprs
PondedModest$wtmaxmPostChange <- PondedModest$wtmaxm + 3
PondedModest$wtmaxsfPostChange <- PondedModest$wtmaxsf - 0.1 * PondedModest$wtmaxsf

#Extreme Change Scenario
PondedExtreme <- Ponded2
PondedExtreme$espro2PostChange <- PondedExtreme$espro2 + 0.4 * PondedExtreme$espro2
PondedExtreme$lsprsPostChange <- PondedExtreme$lsprs - 0.2 * PondedExtreme$lsprs
PondedExtreme$fatmaxmPostChange <- PondedExtreme$fatmaxm + 4
PondedExtreme$wprsPostChange <- PondedExtreme$wprs + 0.3 * PondedExtreme$wprs
PondedExtreme$wtmaxmPostChange <- PondedExtreme$wtmaxm + 5
PondedExtreme$wtmaxsfPostChange <- PondedExtreme$wtmaxsf - 0.2 * PondedExtreme$wtmaxsf

write.csv(PondedModest, "BinomialNonRowcropsModestChange3.csv")
write.csv(PondedExtreme, "BinomialNonRowcropsExtremeChange3.csv")

#####
#####
#####check for outliers in predictor variables
boxplot(Ponded$Pitdist)
dotchart(Ponded$Pitdist)
dotchart(Ponded$Pitdist, group = Ponded$Wettype)
dotchart(Ponded$Pitdist, group = Ponded$agtype)
dotchart(Ponded$Pitdist, group = Ponded$Year)
#outliers are present in distance to neartest irrigation reuse pit

boxplot(Ponded$NewfootPA)
dotchart(Ponded$NewfootPA)
dotchart(Ponded$NewfootPA, group = Ponded$Wettype)
dotchart(Ponded$NewfootPA, group = Ponded$agtype)
dotchart(Ponded$NewfootPA, group  = Ponded$Year)
#outliers are present in wetland perimeter-to-area ratio,
#with especially lower values for semi-permanent wetlands

boxplot(Ponded$espro2)
dotchart(Ponded$espro2)
dotchart(Ponded$espro2, group = Ponded$Wettype)
dotchart(Ponded$espro2, group = Ponded$agtype)
dotchart(Ponded$espro2, group = Ponded$Year)

boxplot(Ponded$esprs)
dotchart(Ponded$esprs)
dotchart(Ponded$esprs, group = Ponded$Wettype)
dotchart(Ponded$esprs, group = Ponded$agtype)
dotchart(Ponded$esprs, group = Ponded$Year)

boxplot(Ponded$estmaxm)
dotchart(Ponded$estmaxm)
dotchart(Ponded$estmaxm, group = Ponded$Wettype)
dotchart(Ponded$estmaxm, group = Ponded$agtype)
dotchart(Ponded$estmaxm, group = Ponded$Year)
#a few outliers do exist in mean spring maximum temperature

boxplot(Ponded$estminm)
dotchart(Ponded$estminm)
dotchart(Ponded$estminm, group = Ponded$Wettype)
dotchart(Ponded$estminm, group = Ponded$agtype)
dotchart(Ponded$estminm, group = Ponded$Year)

boxplot(Ponded$esvpdm)
dotchart(Ponded$esvpdm)
dotchart(Ponded$esvpdm, group = Ponded$Wettype)
dotchart(Ponded$esvpdm, group = Ponded$agtype)
dotchart(Ponded$esvpdm, group = Ponded$Year)
#outliers do exist in mean spring vapor pressure deficit

boxplot(Ponded$lspro2)
dotchart(Ponded$lspro2)
dotchart(Ponded$lspro2, group = Ponded$Wettype)
dotchart(Ponded$lspro2, group = Ponded$agtype)
dotchart(Ponded$lspro2, group = Ponded$Year)

boxplot(Ponded$lsprs)
dotchart(Ponded$lsprs)
dotchart(Ponded$lsprs, group = Ponded$Wettpye)
dotchart(Ponded$lsprs, group = Ponded$agtype)
dotchart(Ponded$lsprs, group = Ponded$Year)
#outliers do exist in summer precipitation sum

boxplot(Ponded$lstmaxm)
dotchart(Ponded$lstmaxm)
dotchart(Ponded$lstmaxm, group = Ponded$Wettype)
dotchart(Ponded$lstmaxm, group = Ponded$agtype)
dotchart(Ponded$lstmaxm, group = Ponded$Year)

boxplot(Ponded$lsvpdm)
dotchart(Ponded$lsvpdm)
dotchart(Ponded$lsvpdm, group = Ponded$Wettype)
dotchart(Ponded$lsvpdm, group = Ponded$agtype)
dotchart(Ponded$lsvpdm, group = Ponded$Year)

boxplot(Ponded$fapro1)
dotchart(Ponded$fapro1)
dotchart(Ponded$fapro1, group = Ponded$Wettype)
dotchart(Ponded$fapro1, group = Ponded$agtype)
dotchart(Ponded$fapro1, group = Ponded$Year)
#several outliers exist in the number of major autumn precipitation events

boxplot(Ponded$faprs)
dotchart(Ponded$faprs)
dotchart(Ponded$faprs, group = Ponded$Wettype)
dotchart(Ponded$faprs, group = Ponded$agtype)
dotchart(Ponded$faprs, group = Ponded$Year)
#outliers do exist in fall precipitation sum

boxplot(Ponded$fatmaxm)
dotchart(Ponded$fatmaxm)
dotchart(Ponded$fatmaxm, group = Ponded$Wettype)
dotchart(Ponded$fatmaxm, group = Ponded$agtype)
dotchart(Ponded$fatmaxm, group = Ponded$Year)

boxplot(Ponded$fatminm)
dotchart(Ponded$fatminm)
dotchart(Ponded$fatminm, group = Ponded$Wettype)
dotchart(Ponded$fatminm, group = Ponded$agtype)
dotchart(Ponded$fatminm, group = Ponded$Year)

boxplot(Ponded$favpdm)
dotchart(Ponded$favpdm)
dotchart(Ponded$favpdm, group = Ponded$Wettype)
dotchart(Ponded$favpdm, group = Ponded$agtype)
dotchart(Ponded$favpdm, group = Ponded$Year)

boxplot(Ponded$wpro1)
dotchart(Ponded$wpro1)
dotchart(Ponded$wpro1, group = Ponded$Wettype)
dotchart(Ponded$wpro1, group = Ponded$agtype)
dotchart(Ponded$wpro1, group = Ponded$Year)
#several outliers exist in the number of major winter precipitation events

boxplot(Ponded$wprs)
dotchart(Ponded$wprs)
dotchart(Ponded$wprs, group = Ponded$Wettype)
dotchart(Ponded$wprs, group = Ponded$agtype)
dotchart(Ponded$wprs, group = Ponded$Year)
#a single outlier exists in winter precipitation sum

boxplot(Ponded$wtmaxm)
dotchart(Ponded$wtmaxm)
dotchart(Ponded$wtmaxm, group = Ponded$Wettype)
dotchart(Ponded$wtmaxm, group = Ponded$agtype)
dotchart(Ponded$wtmaxm, group = Ponded$Year)

boxplot(Ponded$wtmaxsf)
dotchart(Ponded$wtmaxsf)
dotchart(Ponded$wtmaxsf, group = Ponded$Wettype)
dotchart(Ponded$wtmaxsf, group = Ponded$agtype)
dotchart(Ponded$wtmaxsf, group = Ponded$Year)

boxplot(Ponded$wtminm)
dotchart(Ponded$wtminm)
dotchart(Ponded$wtminm, group = Ponded$Wettype)
dotchart(Ponded$wtminm, group = Ponded$agtype)
dotchart(Ponded$wtminm, group = Ponded$Year)

boxplot(Ponded$wtminsf)
dotchart(Ponded$wtminsf)
dotchart(Ponded$wtminsf, group = Ponded$Wettype)
dotchart(Ponded$wtminsf, group = Ponded$agtype)
dotchart(Ponded$wtminsf, group = Ponded$Year)

boxplot(Ponded$ytminaf)
dotchart(Ponded$ytmaxaf)
dotchart(Ponded$ytmaxaf, group = Ponded$Wettype)
dotchart(Ponded$ytmaxaf, group = Ponded$agtype)
dotchart(Ponded$ytmaxaf, group = Ponded$Year)

boxplot(Ponded$ytminsf)
dotchart(Ponded$ytminsf)
dotchart(Ponded$ytminsf, group = Ponded$Wettype)
dotchart(Ponded$ytminsf, group = Ponded$agtype)
dotchart(Ponded$ytminsf, group = Ponded$Year)

#####
#####
#####log transform predictor variables with outliers and no zero values
Ponded$log.Pitdist <- log(Ponded$Pitdist)
Ponded$log.NewfootPA <- log(Ponded$NewfootPA)
Ponded$log.estmaxm <- log(Ponded$estmaxm)
Ponded$log.esvpdm <- log(Ponded$esvpdm)
Ponded$log.lsprs <- log(Ponded$lsprs)
#Ponded$log.fapro1 <- log(Ponded$fapro1 + 0.5)
Ponded$log.faprs <- log(Ponded$faprs)
#Ponded$log.wpro1 <- log(Ponded$wpro1 + 0.5)
Ponded$log.wprs <- log(Ponded$wprs)

#####
#####
#####scale all predictor variables, including those that have been log-transformed
Ponded$scaled.log.Pitdist <- scale(Ponded$log.Pitdist, center = TRUE, scale = TRUE)
Ponded$scaled.log.NewfootPA <- scale(Ponded$log.NewfootPA, center = TRUE, scale = TRUE)
Ponded$scaled.espro2 <- scale(Ponded$espro2, center = TRUE, scale = TRUE)
Ponded$scaled.esprs <- scale(Ponded$esprs, center = TRUE, scale = TRUE)
Ponded$scaled.log.estmaxm <- scale(Ponded$log.estmaxm, center = TRUE, scale = TRUE)
Ponded$scaled.estminm <- scale(Ponded$estminm, center = TRUE, scale = TRUE)
Ponded$scaled.log.esvpdm <- scale(Ponded$log.esvpdm, center = TRUE, scale = TRUE)
Ponded$scaled.lspro2 <- scale(Ponded$lspro2, center = TRUE, scale = TRUE)
Ponded$scaled.log.lsprs <- scale(Ponded$log.lsprs, center = TRUE, scale = TRUE)
Ponded$scaled.lstmaxm <- scale(Ponded$lstmaxm, center = TRUE, scale = TRUE)
Ponded$scaled.lsvpdm <- scale(Ponded$lsvpdm, center = TRUE, scale = TRUE)
Ponded$scaled.fapro1 <- scale(Ponded$fapro1, center = TRUE, scale = TRUE)
Ponded$scaled.log.faprs <- scale(Ponded$log.faprs, center = TRUE, scale = TRUE)
Ponded$scaled.fatmaxm <- scale(Ponded$fatmaxm, center = TRUE, scale = TRUE)
Ponded$scaled.fatminm <- scale(Ponded$fatminm, center = TRUE, scale = TRUE)
Ponded$scaled.favpdm <- scale(Ponded$favpdm, center = TRUE, scale = TRUE)
Ponded$scaled.wpro1 <- scale(Ponded$wpro1, center = TRUE, scale = TRUE)
Ponded$scaled.log.wprs <- scale(Ponded$log.wprs, center = TRUE, scale = TRUE)
Ponded$scaled.wtmaxm <- scale(Ponded$wtmaxm, center = TRUE, scale = TRUE)
Ponded$scaled.wtmaxsf <- scale(Ponded$wtmaxsf, center = TRUE, scale = TRUE)
Ponded$scaled.wtminm <- scale(Ponded$wtminm, center = TRUE, scale = TRUE)
Ponded$scaled.wtminsf <- scale(Ponded$wtminsf, center = TRUE, scale = TRUE)
Ponded$scaled.wvpdm <- scale(Ponded$wvpdm, center = TRUE, scale = TRUE)
Ponded$scaled.ytminaf <- scale(Ponded$ytminaf, center = TRUE, scale = TRUE)
Ponded$scaled.ytminsf <- scale(Ponded$ytminsf, center = TRUE, scale = TRUE)

#####
#####
#####read in nominal variables as factors
Ponded$Wetland = factor(Ponded$Wetland)
Ponded$Wettype = factor(Ponded$Wettype)
Ponded$Year = factor(Ponded$Year)

#####
#####
#####insert script for setting up fancy pairplots
panel.hist <- function(x, ...)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
}

panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- abs(cor(x, y))
    txt <- format(c(r, 0.123456789), digits=digits)[1]
    txt <- paste(prefix, txt, sep="")
    if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
    text(0.5, 0.5, txt, cex = cex.cor * r)
}

#construct pairplots to look at correlations between variables in same season
pairs(~ Presence + scaled.log.Pitdist + scaled.log.NewfootPA, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.espro2 + scaled.esprs + scaled.log.estmaxm + scaled.estminm + scaled.log.esvpdm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.espro2 + scaled.log.estmaxm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.lspro2 + scaled.log.lsprs + scaled.lstmaxm + scaled.lsvpdm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.log.lsprs + scaled.lstmaxm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.fapro1 + scaled.log.faprs + scaled.fatmaxm + scaled.fatminm + scaled.favpdm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.log.faprs + scaled.fatmaxm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.wpro1 + scaled.log.wprs + scaled.wtmaxm + scaled.wtmaxsf + scaled.wtminm + scaled.wtminsf + scaled.wvpdm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.wtmaxm + scaled.log.wprs, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.ytminaf + scaled.ytminsf, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.espro2 + scaled.log.estmaxm + scaled.log.lsprs + scaled.lstmaxm + scaled.log.faprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + scaled.ytminaf, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)
pairs(~ Presence + scaled.espro2 + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded)

#####
#####
#####compare alternative random effects structures and get delta AICc scores and weights
random.set <- list((Presence ~ Wettype + scaled.log.NewfootPA + scaled.espro2 + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Wetland) + (1|Year)),
(Presence ~ Wettype + scaled.log.NewfootPA + scaled.espro2 + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Wetland)),
(Presence ~ Wettype + scaled.log.NewfootPA + scaled.espro2 + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Year)))

test.random <- lapply(random.set, glmer, family = "binomial", REML = TRUE, control = glmerControl(optimizer="bobyqa"), data = Ponded)
random.table <- aictab(test.random, modnames = as.character(random.set), sort = TRUE, second.ord = TRUE)
random.table

top.random <- glmer(Presence ~ Wettype + scaled.log.NewfootPA + scaled.espro2 + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Wetland), family = "binomial", REML = TRUE, control = glmerControl(optimizer="bobyqa"), data = Ponded)
summary(top.random)

#####
#####
#####use model residuals to test assumptions of normality, heterogeneity and independence
hist(resid(top.random))
plot(fitted(top.random), resid(top.random))
plot(resid(top.random) ~ Ponded$Wettype)
plot(resid(top.random) ~ Ponded$Year)
plot(resid(top.random) ~ Ponded$scaled.log.NewfootPA)
plot(resid(top.random) ~ Ponded$scaled.espro2)
plot(resid(top.random) ~ Ponded$scaled.log.lsprs)
plot(resid(top.random) ~ Ponded$scaled.fatmaxm)
plot(resid(top.random) ~ Ponded$scaled.log.wprs)
plot(resid(top.random) ~ Ponded$scaled.wtmaxm)

#####
#####
#####use backwards selection to identify the optimum fixed effects structure
Backwards1 <- drop1(top.random, test = "Chisq", trace = TRUE, REML = FALSE)
Backwards1

top.random.drop1 <- glmer(Presence ~ Wettype + scaled.log.NewfootPA + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Wetland), family = "binomial", REML = TRUE, control = glmerControl(optimizer="bobyqa"), data = Ponded)
summary(top.random.drop1)

Backwards2 <- drop1(top.random.drop1, test = "Chisq", trace = TRUE, REML = FALSE)
Backwards2

top.model <- glmer(Presence ~ Wettype + scaled.log.NewfootPA + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Wetland), family = "binomial", REML = TRUE, control = glmerControl(optimizer="bobyqa"), data = Ponded)
summary(top.model)

#####
#####
#####obtain r-squared values for the best supported model
r.squaredGLMM(top.model)

#####
#####
#####use model residuals to test assumptions of normality, heterogeneity and independence
hist(resid(top.model))
plot(fitted(top.model), resid(top.model))
plot(resid(top.model) ~ Ponded$Wettype)
plot(resid(top.model) ~ Ponded$Year)
plot(resid(top.model) ~ Ponded$scaled.log.NewfootPA)
plot(resid(top.model) ~ Ponded$scaled.log.lsprs)
plot(resid(top.model) ~ Ponded$scaled.fatmaxm)
plot(resid(top.model) ~ Ponded$scaled.log.wprs)
plot(resid(top.model) ~ Ponded$scaled.wtmaxm)

#####
#####
#####check for patterns between model residuals and previously-excluded variables
plot(resid(top.model) ~ Ponded$scaled.log.estmaxm)
plot(resid(top.model) ~ Ponded$scaled.estminm)
plot(resid(top.model) ~ Ponded$scaled.log.esvpdm)
plot(resid(top.model) ~ Ponded$scaled.lspro2)
plot(resid(top.model) ~ Ponded$scaled.lstmaxm)
plot(resid(top.model) ~ Ponded$scaled.lsvpdm)
plot(resid(top.model) ~ Ponded$scaled.fapro1)
plot(resid(top.model) ~ Ponded$scaled.log.faprs)
plot(resid(top.model) ~ Ponded$scaled.fatminm)
plot(resid(top.model) ~ Ponded$scaled.favpdm)
plot(resid(top.model) ~ Ponded$scaled.wpro1)
plot(resid(top.model) ~ Ponded$scaled.wtmaxsf)
plot(resid(top.model) ~ Ponded$scaled.wtminm)
plot(resid(top.model) ~ Ponded$scaled.wtminsf)
plot(resid(top.model) ~ Ponded$scaled.wvpdm)
plot(resid(top.model) ~ Ponded$scaled.ytminsf)

#####
#####
#####experiment with computing confidence intervals
#manually
Coefficients <- coef(summary(top.model))
Lowers <- Coefficients[,1] - (1.96 * Coefficients[,2])
Uppers <- Coefficients[,1] + (1.96 * Coefficients[,2])
ConfInts <- rbind(Coefficients[,1], Lowers, Uppers)
ConfInts

#####
#####
#####make predictions with the best supported generalized linear mixed model
#with only fixed effects considered
Ponded$Pred.Presence.GLMM.FixedOnly <- NA
Ponded$Pred.Presence.GLMM.FixedOnly <- predict(top.model, re.form = ~0, type = 'link')
Ponded$Pred.Presence.GLMM.FixedOnly <- ilogit(Ponded$Pred.Presence.GLMM.FixedOnly)

#test predictions with basic auc test
retain <- c("Wetland", "Presence", "Pred.Presence.GLMM.FixedOnly")
Presence.GLMM.Validation <- Ponded[retain]
Presence.GLMM.AUC <- auc(Presence.GLMM.Validation, st.dev = TRUE, which.model = 1, na.rm = FALSE)
Presence.GLMM.AUC

#generate auc/roc plot
Presence.GLMM.AUC.Plot <- auc.roc.plot(Presence.GLMM.Validation, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model")

#create optimum threshold plot for predictive model
Presence.GLMM.Optimal.Threshold <- error.threshold.plot(Presence.GLMM.Validation, threshold = 101, which.model = 1, na.rm = FALSE, xlab = "Threshold value", ylab = "Accuracy measure", main = "Determining wetland inundation threshold value", plot.it = TRUE, opt.thresholds = TRUE, vert.lines = TRUE)
Presence.GLMM.Optimal.Threshold

#####
#####
#####obtain ponding statistics
#Predicted inundation events
Predicted <- subset(Ponded, Pred.Presence.GLMM.FixedOnly >= 0.73)
nrow(Ponded)
nrow(Predicted)
Predicted2007 <- subset(Predicted, Year == 2007)
nrow(Predicted2007)
Predicted2008 <- subset(Predicted, Year == 2008)
nrow(Predicted2008)
Predicted2009 <- subset(Predicted, Year == 2009)
nrow(Predicted2009)

#observed inundation events
Observed <- subset(Ponded, Presence == 1)
nrow(Observed)
Observed2007 <- subset(Observed, Year == 2007)
nrow(Observed2007)
Observed2008 <- subset(Observed, Year == 2008)
nrow(Observed2008)
Observed2009 <- subset(Observed, Year == 2009)
nrow(Observed2009)

#total yearly records
Year2007 <- subset(Ponded, Year == 2007)
Year2008 <- subset(Ponded, Year == 2008)
Year2009 <- subset(Ponded, Year == 2009)

#observed proportion ponded
nrow(Observed2007)/nrow(Year2007)
nrow(Observed2008)/nrow(Year2008)
nrow(Observed2009)/nrow(Year2009)

#predicted proportion ponded
nrow(Predicted2007)/nrow(Year2007)
nrow(Predicted2008)/nrow(Year2008)
nrow(Predicted2009)/nrow(Year2009)

#predicted inundation likelihoods
mean(Year2007$Pred.Presence.GLMM.FixedOnly)
mean(Year2008$Pred.Presence.GLMM.FixedOnly)
mean(Year2009$Pred.Presence.GLMM.FixedOnly)

####
#####
#####make predictions with the best supported generalized linear mixed model
#with fixed and all random effects considered
Ponded$Pred.Presence.GLMM.FixedAndRandom <- predict(top.model, re.form = ~(1|Wetland), type = 'link')
Ponded$Pred.Presence.GLMM.FixedAndRandom <- ilogit(Ponded$Pred.Presence.GLMM.FixedAndRandom)

#test predictions with basic auc test
retain <- c("Wetland", "Presence", "Pred.Presence.GLMM.FixedAndRandom")
Presence.GLMM.Validation <- Ponded[retain]
Presence.GLMM.AUC <- auc(Presence.GLMM.Validation, st.dev = TRUE, which.model = 1, na.rm = FALSE)
Presence.GLMM.AUC

#generate auc/roc plot
Presence.GLMM.AUC.Plot <- auc.roc.plot(Presence.GLMM.Validation, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model")

#create optimum threshold plot for predictive model
Presence.GLMM.Optimal.Threshold <- error.threshold.plot(Presence.GLMM.Validation, threshold = 101, which.model = 1, na.rm = FALSE, xlab = "Threshold value", ylab = "Accuracy measure", main = "Determining wetland inundation threshold value", plot.it = TRUE, opt.thresholds = TRUE, vert.lines = TRUE)
Presence.GLMM.Optimal.Threshold

#####
#####
#####transform wetland type to binomial
Ponded$SemiPerm <- Ponded$Wettype == 'Semi-permanent'
Ponded$Seasonal <- Ponded$Wettype == 'Seasonal'
Ponded$Temporary <- Ponded$Wettype == 'Temporary'

for(i in 1:length(Ponded$SemiPerm)){
	Ponded$SemiPerm[i] <- recode(Ponded$SemiPerm[i], "c('TRUE') = 1; ('FALSE') = 0")
}
for(i in 1:length(Ponded$Seasonal)){
	Ponded$Seasonal[i] <- recode(Ponded$Seasonal[i], "c('TRUE') = 1; ('FALSE') = 0")
}
for(i in 1:length(Ponded$Temporary)){
	Ponded$Temporary[i] <- recode(Ponded$Temporary[i], "c('TRUE') = 1; ('FALSE') = 0")
}

#####
#####
#####Make manual predictions with best-supported generalized linear mixed model
#with only fixed effects included
Ponded$Pred.Presence.ManualFixedOnly <- NA
for(i in 1:nrow(Ponded)){
	Ponded$Pred.Presence.ManualFixedOnly[[i]] = 1.96446+ 
	+ 0.71939 * Ponded$SemiPerm[[i]]+ 
	- 0.87143 * Ponded$Temporary[[i]]+
	- ((1.76324 * Ponded$log.NewfootPA[[i]] - 1.76324 * -4.20119) / 0.62257)+
	+ ((0.42099 * Ponded$log.lsprs[[i]] - 0.42099 * 5.38196) / 0.31275)+
	- ((0.60281 * Ponded$fatmaxm[[i]] - 0.60281 * 16.08833) / 1.12164)+
	+ ((0.16996 * Ponded$log.wprs[[i]] - 0.16996 * 4.23183) / 0.79371)+
	- ((0.92181 * Ponded$wtmaxm[[i]] - 0.92181 * 6.53900) / 1.42316)
}

Ponded$Pred.Presence.ManualFixedOnly <- ilogit(Ponded$Pred.Presence.ManualFixedOnly)

#test predictions with basic auc test
retain <- c("Wetland", "Presence", "Pred.Presence.ManualFixedOnly")
Presence.GLMM.Validation <- Ponded[retain]
Presence.GLMM.AUC <- auc(Presence.GLMM.Validation, st.dev = TRUE, which.model = 1, na.rm = FALSE)
Presence.GLMM.AUC

#generate auc/roc plot
Presence.GLMM.AUC.Plot <- auc.roc.plot(Presence.GLMM.Validation, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model")

#create optimum threshold plot for predictive model
Presence.GLMM.Optimal.Threshold <- error.threshold.plot(Presence.GLMM.Validation, threshold = 101, which.model = 1, na.rm = FALSE, xlab = "Threshold value", ylab = "Accuracy measure", main = "Determining wetland inundation threshold value", plot.it = TRUE, opt.thresholds = TRUE, vert.lines = TRUE)
Presence.GLMM.Optimal.Threshold

#####
#####
#####use k-fold cross validation function from Chris Jorgensen for validation
#with k = 10
source("cv.glmmFunction_v2.0.R")
Presence.GLMM.CrossValidation <- cv.glmm(Ponded, top.model, k = 10, cvData = TRUE)
str(Presence.GLMM.CrossValidation)
Presence.GLMM.CrossValidation
head(Presence.GLMM.CrossValidation$PredObsData)
summary(Presence.GLMM.CrossValidation)
Presence.GLMM.CrossValidation$delta
Presence.GLMM.CrossValidation$CV

#get auc score and produce ROC plot
ID <- rep(1:nrow(Ponded))
Presence.GLMM.CrossValidation.Stats <- Presence.GLMM.CrossValidation$PredObsData
Presence.GLMM.CrossValidation.Stats <- cbind(Presence.GLMM.CrossValidation.Stats, ID)
Presence.GLMM.CrossValidation.Stats <- Presence.GLMM.CrossValidation.Stats[, c(3,2,1)]
Presence.GLMM.CrossValidation.AUC <- auc(Presence.GLMM.CrossValidation.Stats, st.dev = TRUE, which.model = 1, na.rm = FALSE)
Presence.GLMM.CrossValidation.AUC
Presence.GLMM.CrossValidation.ROC <- auc.roc.plot(Presence.GLMM.CrossValidation.Stats, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model")

#create optimum threshold plot for predictive model
Presence.GLMM.Optimal.Threshold <- error.threshold.plot(Presence.GLMM.CrossValidation.Stats, threshold = 101, which.model = 1, na.rm = FALSE, xlab = "Threshold value", ylab = "Accuracy measure", plot.it = TRUE, opt.thresholds = TRUE, vert.lines = TRUE)
Presence.GLMM.Optimal.Threshold

#read in values for all wetlands roc plot
AllWetlandsROC <- read.csv("Presence.GLMM.CrossValidation.ROC.Values.All.csv", header = TRUE)
TruePositiveAll <- AllWetlandsROC$sensitivity
FalsePositiveAll <- 1 - AllWetlandsROC$specificity

#get values for non-rowcrop wetlands roc plot
Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop <- roc.plot.calculate(Presence.GLMM.CrossValidation.Stats, threshold = 101, which.model = 1, na.rm = FALSE)
head(Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop)
write.csv(Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop, "Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop.csv")
TruePositiveNonRowcrop <- Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop$sensitivity
FalsePositiveNonRowcrop <- 1 - Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop$specificity
DummyLine <- data.frame(X = c(0,1))
DummyLine$Y <- c(0,1)

#construct plot
plot(TruePositiveNonRowcrop ~ FalsePositiveNonRowcrop, frame.plot = TRUE, xlab = "False positive rate", ylab = "True positive rate", axes = FALSE, type = 'l', lty = 2, lwd = 3, cex.lab = 1.25)
lines(DummyLine$X, DummyLine$Y, lty = 3, lwd = 1)
lines(TruePositiveAll ~ FalsePositiveAll, lty = 1, lwd = 2)
axis(side = 1, at = c(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), tck = 0.02)
axis(side = 2, at = c(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), tck = 0.02)
legend(0.25, 0.3, c("Non-rowcrop-embedded (AUC = 0.82)", "All (AUC = 0.79)", "Random chance (AUC = 0.50)"), cex = 0.95, lty = c(2, 1, 3), lwd = c(3, 2, 1), bty = 'n')

#####
#####
#####obtain random effects estimates for individual wetlands
RandomEffects <- ranef(top.model)
WetlandsList <- sort(unique(Ponded$Wetland), descending = FALSE)
RandomEffectsBinomialNonRowcrops <- data.frame(Wetland = WetlandsList)
RandomEffectsBinomialNonRowcrops$RandomEffect <- RandomEffects$Wetland[1:nrow(RandomEffects$Wetland),]
head(RandomEffectsBinomialNonRowcrops)
tail(RandomEffectsBinomialNonRowcrops)

#write to csv file for making predictions
#write.csv(RandomEffectsBinomialNonRowcrops, "RandomEffectsBinomialNonRowcrops.csv")

#merge random effects with existing dataset
Ponded <- merge(Ponded, RandomEffectsBinomialNonRowcrops, by.x = "Wetland", by.y = "Wetland")

#####
#####
#####Make manual predictions with best-supported generalized linear mixed model
#with fixed and random effects included
Ponded$Pred.Presence.ManualFixedAndRandom <- NA
for(i in 1:nrow(Ponded)){
	Ponded$Pred.Presence.ManualFixedAndRandom[[i]] = Ponded$RandomEffect[[i]]+ 
	+ 0.71939 * Ponded$SemiPerm[[i]]+ 
	- 0.87143 * Ponded$Temporary[[i]]+
	- ((1.76324 * Ponded$log.NewfootPA[[i]] - 1.76324 * -4.20119) / 0.62257)+
	+ ((0.42099 * Ponded$log.lsprs[[i]] - 0.42099 * 5.38196) / 0.31275)+
	- ((0.60281 * Ponded$fatmaxm[[i]] - 0.60281 * 16.08833) / 1.12164)+
	+ ((0.16996 * Ponded$log.wprs[[i]] - 0.16996 * 4.23183) / 0.79371)+
	- ((0.92181 * Ponded$wtmaxm[[i]] - 0.92181 * 6.53900) / 1.42316)
}

Ponded$Pred.Presence.ManualFixedAndRandom <- ilogit(Ponded$Pred.Presence.ManualFixedAndRandom)

#test predictions with basic auc test
retain <- c("Wetland", "Presence", "Pred.Presence.ManualFixedAndRandom")
Presence.GLMM.Validation <- Ponded[retain]
Presence.GLMM.AUC <- auc(Presence.GLMM.Validation, st.dev = TRUE, which.model = 1, na.rm = FALSE)
Presence.GLMM.AUC

#generate auc/roc plot
Presence.GLMM.AUC.Plot <- auc.roc.plot(Presence.GLMM.Validation, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model")

#create optimum threshold plot for predictive model
Presence.GLMM.Optimal.Threshold <- error.threshold.plot(Presence.GLMM.Validation, threshold = 101, which.model = 1, na.rm = FALSE, xlab = "Threshold value", ylab = "Accuracy measure", main = "Determining wetland inundation threshold value", plot.it = TRUE, opt.thresholds = TRUE, vert.lines = TRUE)
Presence.GLMM.Optimal.Threshold

#####
#####
#####determine mean predicted and observed ponding events in testing sets
Predicted <- Presence.GLMM.CrossValidation$PredObsData[1]
Observed <- Presence.GLMM.CrossValidation$PredObsData[2]
mean(Predicted$pred)
mean(Observed$obs)

#with k = 20
Presence.GLMM.CrossValidation2 <- cv.glmm(Ponded, top.model, k = 20, cvData = TRUE)
str(Presence.GLMM.CrossValidation2)
head(Presence.GLMM.CrossValidation2$PredObsData)
summary(Presence.GLMM.CrossValidation2)
Presence.GLMM.CrossValidation2$delta
Presence.GLMM.CrossValidation2$CV

#get auc score and produce ROC plot
ID <- rep(1:nrow(Ponded))
Presence.GLMM.CrossValidation.Stats2 <- Presence.GLMM.CrossValidation2$PredObsData
Presence.GLMM.CrossValidation.Stats2 <- cbind(Presence.GLMM.CrossValidation.Stats2, ID)
Presence.GLMM.CrossValidation.Stats2 <- Presence.GLMM.CrossValidation.Stats2[, c(3,2,1)]
Presence.GLMM.CrossValidation.AUC2 <- auc(Presence.GLMM.CrossValidation2, st.dev = TRUE, which.model = 1, na.rm = FALSE)
Presence.GLMM.CrossValidation.AUC2
Presence.GLMM.CrossValidation.ROC2 <- auc.roc.plot(Presence.GLMM.CrossValidation2, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model")

#with k = 100
Presence.GLMM.CrossValidation3 <- cv.glmm(Ponded, top.model, k = 100, cvData = TRUE)
str(Presence.GLMM.CrossValidation3)
head(Presence.GLMM.CrossValidation3$PredObsData)
summary(Presence.GLMM.CrossValidation3)
Presence.GLMM.CrossValidation3$delta
Presence.GLMM.CrossValidation3$CV

#get auc score and produce ROC plot
ID <- rep(1:nrow(Ponded))
Presence.GLMM.CrossValidation.Stats3 <- Presence.GLMM.CrossValidation3$PredObsData
Presence.GLMM.CrossValidation.Stats3 <- cbind(Presence.GLMM.CrossValidation.Stats3, ID)
Presence.GLMM.CrossValidation.Stats3 <- Presence.GLMM.CrossValidation.Stats3[, c(3,2,1)]
Presence.GLMM.CrossValidation.AUC3 <- auc(Presence.GLMM.CrossValidation3, st.dev = TRUE, which.model = 1, na.rm = FALSE)
Presence.GLMM.CrossValidation.AUC3
Presence.GLMM.CrossValidation.ROC3 <- auc.roc.plot(Presence.GLMM.CrossValidation3, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model")

#####
#####
#####detect and address non-linear relationships between the residuals of the
#best-supported linear mixed model and predictors, in order to gauge if it is
#appropriate to incorporate smoothing terms via generalized additive mixed modeling
#not subset by any nominal variable
xyplot(resid(top.model) ~ Ponded$scaled.log.NewfootPA, type = 'smooth')
xyplot(resid(top.model) ~ Ponded$scaled.log.lsprs, type = 'smooth')
xyplot(resid(top.model) ~ Ponded$scaled.fatmaxm, type = 'smooth')
xyplot(resid(top.model) ~ Ponded$scaled.wtmaxm, type = 'smooth')

#subset by wetland type
xyplot(resid(top.model) ~ Ponded$scaled.log.NewfootPA|Ponded$Wettype, type = 'smooth')
xyplot(resid(top.model) ~ Ponded$scaled.log.lsprs|Ponded$Wettype, type = 'smooth')
xyplot(resid(top.model) ~ Ponded$scaled.fatmaxm|Ponded$Wettype, type = 'smooth')
xyplot(resid(top.model) ~ Ponded$scaled.wtmaxm|Ponded$Wettype, type = 'smooth')

#subset by year
xyplot(resid(top.model) ~ Ponded$scaled.log.NewfootPA|Ponded$Year, type = 'smooth')
xyplot(resid(top.model) ~ Ponded$scaled.log.lsprs|Ponded$Year, type = 'smooth')
xyplot(resid(top.model) ~ Ponded$scaled.fatmaxm|Ponded$Year, type = 'smooth')
xyplot(resid(top.model) ~ Ponded$scaled.wtmaxm|Ponded$Year, type = 'smooth')