##### ##### #####read in the appropriate libraries library(AICcmodavg) library(lme4) library(lattice) library(mgcv) library(gamm4) library(car) library(cvTools) library(influence.ME) library(sp) library(rgdal) library(raster) library(maptools) library(ape) library(geosphere) library(geoR) library(faraway) library(PresenceAbsence) library(MuMIn) ##### ##### #####read in data Ponded <- read.csv("BinomialNoRowcropsCombined.csv", header = TRUE) head(Ponded) nrow(Ponded) ##### ##### #####eliminate duplicate values among years Ponded2 <- subset(Ponded, !duplicated(Ponded$Wetland)) nrow(Ponded2) colnames(Ponded2) #obtain mean values for weather variables among years Wetlands <- unique(Ponded$Wetland) for(i in 1:nrow(Ponded2)){ Wetland <- subset(Ponded, Wetland == Wetlands[i]) Ponded2$espro2[i] <- mean(Wetland$espro2) Ponded2$lsrps[i] <- mean(Wetland$lsprs) Ponded2$fatmaxm[i] <- mean(Wetland$fatmaxm) Ponded2$wprs[i] <- mean(Wetland$wprs) Ponded2$wtmaxm[i] <- mean(Wetland$wtmaxm) Ponded2$wtmaxsf[i] <- mean(Wetland$wtmaxsf) } ##### ##### #####write new data frames and csv files with assumed climate change scenarios #Modest Change Scenario PondedModest <- Ponded2 PondedModest$espro2PostChange <- PondedModest$espro2 + 0.2 * PondedModest$espro2 PondedModest$lsprsPostChange <- PondedModest$lsprs PondedModest$fatmaxmPostChange <- PondedModest$fatmaxm + 2 PondedModest$wprsPostChange <- PondedModest$wprs + 0.1 * PondedModest$wprs PondedModest$wtmaxmPostChange <- PondedModest$wtmaxm + 3 PondedModest$wtmaxsfPostChange <- PondedModest$wtmaxsf - 0.1 * PondedModest$wtmaxsf #Extreme Change Scenario PondedExtreme <- Ponded2 PondedExtreme$espro2PostChange <- PondedExtreme$espro2 + 0.4 * PondedExtreme$espro2 PondedExtreme$lsprsPostChange <- PondedExtreme$lsprs - 0.2 * PondedExtreme$lsprs PondedExtreme$fatmaxmPostChange <- PondedExtreme$fatmaxm + 4 PondedExtreme$wprsPostChange <- PondedExtreme$wprs + 0.3 * PondedExtreme$wprs PondedExtreme$wtmaxmPostChange <- PondedExtreme$wtmaxm + 5 PondedExtreme$wtmaxsfPostChange <- PondedExtreme$wtmaxsf - 0.2 * PondedExtreme$wtmaxsf write.csv(PondedModest, "BinomialNonRowcropsModestChange3.csv") write.csv(PondedExtreme, "BinomialNonRowcropsExtremeChange3.csv") ##### ##### #####check for outliers in predictor variables boxplot(Ponded$Pitdist) dotchart(Ponded$Pitdist) dotchart(Ponded$Pitdist, group = Ponded$Wettype) dotchart(Ponded$Pitdist, group = Ponded$agtype) dotchart(Ponded$Pitdist, group = Ponded$Year) #outliers are present in distance to neartest irrigation reuse pit boxplot(Ponded$NewfootPA) dotchart(Ponded$NewfootPA) dotchart(Ponded$NewfootPA, group = Ponded$Wettype) dotchart(Ponded$NewfootPA, group = Ponded$agtype) dotchart(Ponded$NewfootPA, group = Ponded$Year) #outliers are present in wetland perimeter-to-area ratio, #with especially lower values for semi-permanent wetlands boxplot(Ponded$espro2) dotchart(Ponded$espro2) dotchart(Ponded$espro2, group = Ponded$Wettype) dotchart(Ponded$espro2, group = Ponded$agtype) dotchart(Ponded$espro2, group = Ponded$Year) boxplot(Ponded$esprs) dotchart(Ponded$esprs) dotchart(Ponded$esprs, group = Ponded$Wettype) dotchart(Ponded$esprs, group = Ponded$agtype) dotchart(Ponded$esprs, group = Ponded$Year) boxplot(Ponded$estmaxm) dotchart(Ponded$estmaxm) dotchart(Ponded$estmaxm, group = Ponded$Wettype) dotchart(Ponded$estmaxm, group = Ponded$agtype) dotchart(Ponded$estmaxm, group = Ponded$Year) #a few outliers do exist in mean spring maximum temperature boxplot(Ponded$estminm) dotchart(Ponded$estminm) dotchart(Ponded$estminm, group = Ponded$Wettype) dotchart(Ponded$estminm, group = Ponded$agtype) dotchart(Ponded$estminm, group = Ponded$Year) boxplot(Ponded$esvpdm) dotchart(Ponded$esvpdm) dotchart(Ponded$esvpdm, group = Ponded$Wettype) dotchart(Ponded$esvpdm, group = Ponded$agtype) dotchart(Ponded$esvpdm, group = Ponded$Year) #outliers do exist in mean spring vapor pressure deficit boxplot(Ponded$lspro2) dotchart(Ponded$lspro2) dotchart(Ponded$lspro2, group = Ponded$Wettype) dotchart(Ponded$lspro2, group = Ponded$agtype) dotchart(Ponded$lspro2, group = Ponded$Year) boxplot(Ponded$lsprs) dotchart(Ponded$lsprs) dotchart(Ponded$lsprs, group = Ponded$Wettpye) dotchart(Ponded$lsprs, group = Ponded$agtype) dotchart(Ponded$lsprs, group = Ponded$Year) #outliers do exist in summer precipitation sum boxplot(Ponded$lstmaxm) dotchart(Ponded$lstmaxm) dotchart(Ponded$lstmaxm, group = Ponded$Wettype) dotchart(Ponded$lstmaxm, group = Ponded$agtype) dotchart(Ponded$lstmaxm, group = Ponded$Year) boxplot(Ponded$lsvpdm) dotchart(Ponded$lsvpdm) dotchart(Ponded$lsvpdm, group = Ponded$Wettype) dotchart(Ponded$lsvpdm, group = Ponded$agtype) dotchart(Ponded$lsvpdm, group = Ponded$Year) boxplot(Ponded$fapro1) dotchart(Ponded$fapro1) dotchart(Ponded$fapro1, group = Ponded$Wettype) dotchart(Ponded$fapro1, group = Ponded$agtype) dotchart(Ponded$fapro1, group = Ponded$Year) #several outliers exist in the number of major autumn precipitation events boxplot(Ponded$faprs) dotchart(Ponded$faprs) dotchart(Ponded$faprs, group = Ponded$Wettype) dotchart(Ponded$faprs, group = Ponded$agtype) dotchart(Ponded$faprs, group = Ponded$Year) #outliers do exist in fall precipitation sum boxplot(Ponded$fatmaxm) dotchart(Ponded$fatmaxm) dotchart(Ponded$fatmaxm, group = Ponded$Wettype) dotchart(Ponded$fatmaxm, group = Ponded$agtype) dotchart(Ponded$fatmaxm, group = Ponded$Year) boxplot(Ponded$fatminm) dotchart(Ponded$fatminm) dotchart(Ponded$fatminm, group = Ponded$Wettype) dotchart(Ponded$fatminm, group = Ponded$agtype) dotchart(Ponded$fatminm, group = Ponded$Year) boxplot(Ponded$favpdm) dotchart(Ponded$favpdm) dotchart(Ponded$favpdm, group = Ponded$Wettype) dotchart(Ponded$favpdm, group = Ponded$agtype) dotchart(Ponded$favpdm, group = Ponded$Year) boxplot(Ponded$wpro1) dotchart(Ponded$wpro1) dotchart(Ponded$wpro1, group = Ponded$Wettype) dotchart(Ponded$wpro1, group = Ponded$agtype) dotchart(Ponded$wpro1, group = Ponded$Year) #several outliers exist in the number of major winter precipitation events boxplot(Ponded$wprs) dotchart(Ponded$wprs) dotchart(Ponded$wprs, group = Ponded$Wettype) dotchart(Ponded$wprs, group = Ponded$agtype) dotchart(Ponded$wprs, group = Ponded$Year) #a single outlier exists in winter precipitation sum boxplot(Ponded$wtmaxm) dotchart(Ponded$wtmaxm) dotchart(Ponded$wtmaxm, group = Ponded$Wettype) dotchart(Ponded$wtmaxm, group = Ponded$agtype) dotchart(Ponded$wtmaxm, group = Ponded$Year) boxplot(Ponded$wtmaxsf) dotchart(Ponded$wtmaxsf) dotchart(Ponded$wtmaxsf, group = Ponded$Wettype) dotchart(Ponded$wtmaxsf, group = Ponded$agtype) dotchart(Ponded$wtmaxsf, group = Ponded$Year) boxplot(Ponded$wtminm) dotchart(Ponded$wtminm) dotchart(Ponded$wtminm, group = Ponded$Wettype) dotchart(Ponded$wtminm, group = Ponded$agtype) dotchart(Ponded$wtminm, group = Ponded$Year) boxplot(Ponded$wtminsf) dotchart(Ponded$wtminsf) dotchart(Ponded$wtminsf, group = Ponded$Wettype) dotchart(Ponded$wtminsf, group = Ponded$agtype) dotchart(Ponded$wtminsf, group = Ponded$Year) boxplot(Ponded$ytminaf) dotchart(Ponded$ytmaxaf) dotchart(Ponded$ytmaxaf, group = Ponded$Wettype) dotchart(Ponded$ytmaxaf, group = Ponded$agtype) dotchart(Ponded$ytmaxaf, group = Ponded$Year) boxplot(Ponded$ytminsf) dotchart(Ponded$ytminsf) dotchart(Ponded$ytminsf, group = Ponded$Wettype) dotchart(Ponded$ytminsf, group = Ponded$agtype) dotchart(Ponded$ytminsf, group = Ponded$Year) ##### ##### #####log transform predictor variables with outliers and no zero values Ponded$log.Pitdist <- log(Ponded$Pitdist) Ponded$log.NewfootPA <- log(Ponded$NewfootPA) Ponded$log.estmaxm <- log(Ponded$estmaxm) Ponded$log.esvpdm <- log(Ponded$esvpdm) Ponded$log.lsprs <- log(Ponded$lsprs) #Ponded$log.fapro1 <- log(Ponded$fapro1 + 0.5) Ponded$log.faprs <- log(Ponded$faprs) #Ponded$log.wpro1 <- log(Ponded$wpro1 + 0.5) Ponded$log.wprs <- log(Ponded$wprs) ##### ##### #####scale all predictor variables, including those that have been log-transformed Ponded$scaled.log.Pitdist <- scale(Ponded$log.Pitdist, center = TRUE, scale = TRUE) Ponded$scaled.log.NewfootPA <- scale(Ponded$log.NewfootPA, center = TRUE, scale = TRUE) Ponded$scaled.espro2 <- scale(Ponded$espro2, center = TRUE, scale = TRUE) Ponded$scaled.esprs <- scale(Ponded$esprs, center = TRUE, scale = TRUE) Ponded$scaled.log.estmaxm <- scale(Ponded$log.estmaxm, center = TRUE, scale = TRUE) Ponded$scaled.estminm <- scale(Ponded$estminm, center = TRUE, scale = TRUE) Ponded$scaled.log.esvpdm <- scale(Ponded$log.esvpdm, center = TRUE, scale = TRUE) Ponded$scaled.lspro2 <- scale(Ponded$lspro2, center = TRUE, scale = TRUE) Ponded$scaled.log.lsprs <- scale(Ponded$log.lsprs, center = TRUE, scale = TRUE) Ponded$scaled.lstmaxm <- scale(Ponded$lstmaxm, center = TRUE, scale = TRUE) Ponded$scaled.lsvpdm <- scale(Ponded$lsvpdm, center = TRUE, scale = TRUE) Ponded$scaled.fapro1 <- scale(Ponded$fapro1, center = TRUE, scale = TRUE) Ponded$scaled.log.faprs <- scale(Ponded$log.faprs, center = TRUE, scale = TRUE) Ponded$scaled.fatmaxm <- scale(Ponded$fatmaxm, center = TRUE, scale = TRUE) Ponded$scaled.fatminm <- scale(Ponded$fatminm, center = TRUE, scale = TRUE) Ponded$scaled.favpdm <- scale(Ponded$favpdm, center = TRUE, scale = TRUE) Ponded$scaled.wpro1 <- scale(Ponded$wpro1, center = TRUE, scale = TRUE) Ponded$scaled.log.wprs <- scale(Ponded$log.wprs, center = TRUE, scale = TRUE) Ponded$scaled.wtmaxm <- scale(Ponded$wtmaxm, center = TRUE, scale = TRUE) Ponded$scaled.wtmaxsf <- scale(Ponded$wtmaxsf, center = TRUE, scale = TRUE) Ponded$scaled.wtminm <- scale(Ponded$wtminm, center = TRUE, scale = TRUE) Ponded$scaled.wtminsf <- scale(Ponded$wtminsf, center = TRUE, scale = TRUE) Ponded$scaled.wvpdm <- scale(Ponded$wvpdm, center = TRUE, scale = TRUE) Ponded$scaled.ytminaf <- scale(Ponded$ytminaf, center = TRUE, scale = TRUE) Ponded$scaled.ytminsf <- scale(Ponded$ytminsf, center = TRUE, scale = TRUE) ##### ##### #####read in nominal variables as factors Ponded$Wetland = factor(Ponded$Wetland) Ponded$Wettype = factor(Ponded$Wettype) Ponded$Year = factor(Ponded$Year) ##### ##### #####insert script for setting up fancy pairplots panel.hist <- function(x, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(usr[1:2], 0, 1.5) ) h <- hist(x, plot = FALSE) breaks <- h$breaks; nB <- length(breaks) y <- h$counts; y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...) } panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) { usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits=digits)[1] txt <- paste(prefix, txt, sep="") if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor * r) } #construct pairplots to look at correlations between variables in same season pairs(~ Presence + scaled.log.Pitdist + scaled.log.NewfootPA, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.espro2 + scaled.esprs + scaled.log.estmaxm + scaled.estminm + scaled.log.esvpdm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.espro2 + scaled.log.estmaxm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.lspro2 + scaled.log.lsprs + scaled.lstmaxm + scaled.lsvpdm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.log.lsprs + scaled.lstmaxm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.fapro1 + scaled.log.faprs + scaled.fatmaxm + scaled.fatminm + scaled.favpdm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.log.faprs + scaled.fatmaxm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.wpro1 + scaled.log.wprs + scaled.wtmaxm + scaled.wtmaxsf + scaled.wtminm + scaled.wtminsf + scaled.wvpdm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.wtmaxm + scaled.log.wprs, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.ytminaf + scaled.ytminsf, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.espro2 + scaled.log.estmaxm + scaled.log.lsprs + scaled.lstmaxm + scaled.log.faprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + scaled.ytminaf, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) pairs(~ Presence + scaled.espro2 + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm, diag.panel = panel.hist, upper.panel = panel.smooth, lower.panel = panel.cor, data = Ponded) ##### ##### #####compare alternative random effects structures and get delta AICc scores and weights random.set <- list((Presence ~ Wettype + scaled.log.NewfootPA + scaled.espro2 + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Wetland) + (1|Year)), (Presence ~ Wettype + scaled.log.NewfootPA + scaled.espro2 + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Wetland)), (Presence ~ Wettype + scaled.log.NewfootPA + scaled.espro2 + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Year))) test.random <- lapply(random.set, glmer, family = "binomial", REML = TRUE, control = glmerControl(optimizer="bobyqa"), data = Ponded) random.table <- aictab(test.random, modnames = as.character(random.set), sort = TRUE, second.ord = TRUE) random.table top.random <- glmer(Presence ~ Wettype + scaled.log.NewfootPA + scaled.espro2 + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Wetland), family = "binomial", REML = TRUE, control = glmerControl(optimizer="bobyqa"), data = Ponded) summary(top.random) ##### ##### #####use model residuals to test assumptions of normality, heterogeneity and independence hist(resid(top.random)) plot(fitted(top.random), resid(top.random)) plot(resid(top.random) ~ Ponded$Wettype) plot(resid(top.random) ~ Ponded$Year) plot(resid(top.random) ~ Ponded$scaled.log.NewfootPA) plot(resid(top.random) ~ Ponded$scaled.espro2) plot(resid(top.random) ~ Ponded$scaled.log.lsprs) plot(resid(top.random) ~ Ponded$scaled.fatmaxm) plot(resid(top.random) ~ Ponded$scaled.log.wprs) plot(resid(top.random) ~ Ponded$scaled.wtmaxm) ##### ##### #####use backwards selection to identify the optimum fixed effects structure Backwards1 <- drop1(top.random, test = "Chisq", trace = TRUE, REML = FALSE) Backwards1 top.random.drop1 <- glmer(Presence ~ Wettype + scaled.log.NewfootPA + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Wetland), family = "binomial", REML = TRUE, control = glmerControl(optimizer="bobyqa"), data = Ponded) summary(top.random.drop1) Backwards2 <- drop1(top.random.drop1, test = "Chisq", trace = TRUE, REML = FALSE) Backwards2 top.model <- glmer(Presence ~ Wettype + scaled.log.NewfootPA + scaled.log.lsprs + scaled.fatmaxm + scaled.log.wprs + scaled.wtmaxm + (1|Wetland), family = "binomial", REML = TRUE, control = glmerControl(optimizer="bobyqa"), data = Ponded) summary(top.model) ##### ##### #####obtain r-squared values for the best supported model r.squaredGLMM(top.model) ##### ##### #####use model residuals to test assumptions of normality, heterogeneity and independence hist(resid(top.model)) plot(fitted(top.model), resid(top.model)) plot(resid(top.model) ~ Ponded$Wettype) plot(resid(top.model) ~ Ponded$Year) plot(resid(top.model) ~ Ponded$scaled.log.NewfootPA) plot(resid(top.model) ~ Ponded$scaled.log.lsprs) plot(resid(top.model) ~ Ponded$scaled.fatmaxm) plot(resid(top.model) ~ Ponded$scaled.log.wprs) plot(resid(top.model) ~ Ponded$scaled.wtmaxm) ##### ##### #####check for patterns between model residuals and previously-excluded variables plot(resid(top.model) ~ Ponded$scaled.log.estmaxm) plot(resid(top.model) ~ Ponded$scaled.estminm) plot(resid(top.model) ~ Ponded$scaled.log.esvpdm) plot(resid(top.model) ~ Ponded$scaled.lspro2) plot(resid(top.model) ~ Ponded$scaled.lstmaxm) plot(resid(top.model) ~ Ponded$scaled.lsvpdm) plot(resid(top.model) ~ Ponded$scaled.fapro1) plot(resid(top.model) ~ Ponded$scaled.log.faprs) plot(resid(top.model) ~ Ponded$scaled.fatminm) plot(resid(top.model) ~ Ponded$scaled.favpdm) plot(resid(top.model) ~ Ponded$scaled.wpro1) plot(resid(top.model) ~ Ponded$scaled.wtmaxsf) plot(resid(top.model) ~ Ponded$scaled.wtminm) plot(resid(top.model) ~ Ponded$scaled.wtminsf) plot(resid(top.model) ~ Ponded$scaled.wvpdm) plot(resid(top.model) ~ Ponded$scaled.ytminsf) ##### ##### #####experiment with computing confidence intervals #manually Coefficients <- coef(summary(top.model)) Lowers <- Coefficients[,1] - (1.96 * Coefficients[,2]) Uppers <- Coefficients[,1] + (1.96 * Coefficients[,2]) ConfInts <- rbind(Coefficients[,1], Lowers, Uppers) ConfInts ##### ##### #####make predictions with the best supported generalized linear mixed model #with only fixed effects considered Ponded$Pred.Presence.GLMM.FixedOnly <- NA Ponded$Pred.Presence.GLMM.FixedOnly <- predict(top.model, re.form = ~0, type = 'link') Ponded$Pred.Presence.GLMM.FixedOnly <- ilogit(Ponded$Pred.Presence.GLMM.FixedOnly) #test predictions with basic auc test retain <- c("Wetland", "Presence", "Pred.Presence.GLMM.FixedOnly") Presence.GLMM.Validation <- Ponded[retain] Presence.GLMM.AUC <- auc(Presence.GLMM.Validation, st.dev = TRUE, which.model = 1, na.rm = FALSE) Presence.GLMM.AUC #generate auc/roc plot Presence.GLMM.AUC.Plot <- auc.roc.plot(Presence.GLMM.Validation, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model") #create optimum threshold plot for predictive model Presence.GLMM.Optimal.Threshold <- error.threshold.plot(Presence.GLMM.Validation, threshold = 101, which.model = 1, na.rm = FALSE, xlab = "Threshold value", ylab = "Accuracy measure", main = "Determining wetland inundation threshold value", plot.it = TRUE, opt.thresholds = TRUE, vert.lines = TRUE) Presence.GLMM.Optimal.Threshold ##### ##### #####obtain ponding statistics #Predicted inundation events Predicted <- subset(Ponded, Pred.Presence.GLMM.FixedOnly >= 0.73) nrow(Ponded) nrow(Predicted) Predicted2007 <- subset(Predicted, Year == 2007) nrow(Predicted2007) Predicted2008 <- subset(Predicted, Year == 2008) nrow(Predicted2008) Predicted2009 <- subset(Predicted, Year == 2009) nrow(Predicted2009) #observed inundation events Observed <- subset(Ponded, Presence == 1) nrow(Observed) Observed2007 <- subset(Observed, Year == 2007) nrow(Observed2007) Observed2008 <- subset(Observed, Year == 2008) nrow(Observed2008) Observed2009 <- subset(Observed, Year == 2009) nrow(Observed2009) #total yearly records Year2007 <- subset(Ponded, Year == 2007) Year2008 <- subset(Ponded, Year == 2008) Year2009 <- subset(Ponded, Year == 2009) #observed proportion ponded nrow(Observed2007)/nrow(Year2007) nrow(Observed2008)/nrow(Year2008) nrow(Observed2009)/nrow(Year2009) #predicted proportion ponded nrow(Predicted2007)/nrow(Year2007) nrow(Predicted2008)/nrow(Year2008) nrow(Predicted2009)/nrow(Year2009) #predicted inundation likelihoods mean(Year2007$Pred.Presence.GLMM.FixedOnly) mean(Year2008$Pred.Presence.GLMM.FixedOnly) mean(Year2009$Pred.Presence.GLMM.FixedOnly) #### ##### #####make predictions with the best supported generalized linear mixed model #with fixed and all random effects considered Ponded$Pred.Presence.GLMM.FixedAndRandom <- predict(top.model, re.form = ~(1|Wetland), type = 'link') Ponded$Pred.Presence.GLMM.FixedAndRandom <- ilogit(Ponded$Pred.Presence.GLMM.FixedAndRandom) #test predictions with basic auc test retain <- c("Wetland", "Presence", "Pred.Presence.GLMM.FixedAndRandom") Presence.GLMM.Validation <- Ponded[retain] Presence.GLMM.AUC <- auc(Presence.GLMM.Validation, st.dev = TRUE, which.model = 1, na.rm = FALSE) Presence.GLMM.AUC #generate auc/roc plot Presence.GLMM.AUC.Plot <- auc.roc.plot(Presence.GLMM.Validation, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model") #create optimum threshold plot for predictive model Presence.GLMM.Optimal.Threshold <- error.threshold.plot(Presence.GLMM.Validation, threshold = 101, which.model = 1, na.rm = FALSE, xlab = "Threshold value", ylab = "Accuracy measure", main = "Determining wetland inundation threshold value", plot.it = TRUE, opt.thresholds = TRUE, vert.lines = TRUE) Presence.GLMM.Optimal.Threshold ##### ##### #####transform wetland type to binomial Ponded$SemiPerm <- Ponded$Wettype == 'Semi-permanent' Ponded$Seasonal <- Ponded$Wettype == 'Seasonal' Ponded$Temporary <- Ponded$Wettype == 'Temporary' for(i in 1:length(Ponded$SemiPerm)){ Ponded$SemiPerm[i] <- recode(Ponded$SemiPerm[i], "c('TRUE') = 1; ('FALSE') = 0") } for(i in 1:length(Ponded$Seasonal)){ Ponded$Seasonal[i] <- recode(Ponded$Seasonal[i], "c('TRUE') = 1; ('FALSE') = 0") } for(i in 1:length(Ponded$Temporary)){ Ponded$Temporary[i] <- recode(Ponded$Temporary[i], "c('TRUE') = 1; ('FALSE') = 0") } ##### ##### #####Make manual predictions with best-supported generalized linear mixed model #with only fixed effects included Ponded$Pred.Presence.ManualFixedOnly <- NA for(i in 1:nrow(Ponded)){ Ponded$Pred.Presence.ManualFixedOnly[[i]] = 1.96446+ + 0.71939 * Ponded$SemiPerm[[i]]+ - 0.87143 * Ponded$Temporary[[i]]+ - ((1.76324 * Ponded$log.NewfootPA[[i]] - 1.76324 * -4.20119) / 0.62257)+ + ((0.42099 * Ponded$log.lsprs[[i]] - 0.42099 * 5.38196) / 0.31275)+ - ((0.60281 * Ponded$fatmaxm[[i]] - 0.60281 * 16.08833) / 1.12164)+ + ((0.16996 * Ponded$log.wprs[[i]] - 0.16996 * 4.23183) / 0.79371)+ - ((0.92181 * Ponded$wtmaxm[[i]] - 0.92181 * 6.53900) / 1.42316) } Ponded$Pred.Presence.ManualFixedOnly <- ilogit(Ponded$Pred.Presence.ManualFixedOnly) #test predictions with basic auc test retain <- c("Wetland", "Presence", "Pred.Presence.ManualFixedOnly") Presence.GLMM.Validation <- Ponded[retain] Presence.GLMM.AUC <- auc(Presence.GLMM.Validation, st.dev = TRUE, which.model = 1, na.rm = FALSE) Presence.GLMM.AUC #generate auc/roc plot Presence.GLMM.AUC.Plot <- auc.roc.plot(Presence.GLMM.Validation, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model") #create optimum threshold plot for predictive model Presence.GLMM.Optimal.Threshold <- error.threshold.plot(Presence.GLMM.Validation, threshold = 101, which.model = 1, na.rm = FALSE, xlab = "Threshold value", ylab = "Accuracy measure", main = "Determining wetland inundation threshold value", plot.it = TRUE, opt.thresholds = TRUE, vert.lines = TRUE) Presence.GLMM.Optimal.Threshold ##### ##### #####use k-fold cross validation function from Chris Jorgensen for validation #with k = 10 source("cv.glmmFunction_v2.0.R") Presence.GLMM.CrossValidation <- cv.glmm(Ponded, top.model, k = 10, cvData = TRUE) str(Presence.GLMM.CrossValidation) Presence.GLMM.CrossValidation head(Presence.GLMM.CrossValidation$PredObsData) summary(Presence.GLMM.CrossValidation) Presence.GLMM.CrossValidation$delta Presence.GLMM.CrossValidation$CV #get auc score and produce ROC plot ID <- rep(1:nrow(Ponded)) Presence.GLMM.CrossValidation.Stats <- Presence.GLMM.CrossValidation$PredObsData Presence.GLMM.CrossValidation.Stats <- cbind(Presence.GLMM.CrossValidation.Stats, ID) Presence.GLMM.CrossValidation.Stats <- Presence.GLMM.CrossValidation.Stats[, c(3,2,1)] Presence.GLMM.CrossValidation.AUC <- auc(Presence.GLMM.CrossValidation.Stats, st.dev = TRUE, which.model = 1, na.rm = FALSE) Presence.GLMM.CrossValidation.AUC Presence.GLMM.CrossValidation.ROC <- auc.roc.plot(Presence.GLMM.CrossValidation.Stats, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model") #create optimum threshold plot for predictive model Presence.GLMM.Optimal.Threshold <- error.threshold.plot(Presence.GLMM.CrossValidation.Stats, threshold = 101, which.model = 1, na.rm = FALSE, xlab = "Threshold value", ylab = "Accuracy measure", plot.it = TRUE, opt.thresholds = TRUE, vert.lines = TRUE) Presence.GLMM.Optimal.Threshold #read in values for all wetlands roc plot AllWetlandsROC <- read.csv("Presence.GLMM.CrossValidation.ROC.Values.All.csv", header = TRUE) TruePositiveAll <- AllWetlandsROC$sensitivity FalsePositiveAll <- 1 - AllWetlandsROC$specificity #get values for non-rowcrop wetlands roc plot Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop <- roc.plot.calculate(Presence.GLMM.CrossValidation.Stats, threshold = 101, which.model = 1, na.rm = FALSE) head(Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop) write.csv(Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop, "Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop.csv") TruePositiveNonRowcrop <- Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop$sensitivity FalsePositiveNonRowcrop <- 1 - Presence.GLMM.CrossValidation.ROC.Values.NonRowcrop$specificity DummyLine <- data.frame(X = c(0,1)) DummyLine$Y <- c(0,1) #construct plot plot(TruePositiveNonRowcrop ~ FalsePositiveNonRowcrop, frame.plot = TRUE, xlab = "False positive rate", ylab = "True positive rate", axes = FALSE, type = 'l', lty = 2, lwd = 3, cex.lab = 1.25) lines(DummyLine$X, DummyLine$Y, lty = 3, lwd = 1) lines(TruePositiveAll ~ FalsePositiveAll, lty = 1, lwd = 2) axis(side = 1, at = c(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), tck = 0.02) axis(side = 2, at = c(0.0, 0.2, 0.4, 0.6, 0.8, 1.0), tck = 0.02) legend(0.25, 0.3, c("Non-rowcrop-embedded (AUC = 0.82)", "All (AUC = 0.79)", "Random chance (AUC = 0.50)"), cex = 0.95, lty = c(2, 1, 3), lwd = c(3, 2, 1), bty = 'n') ##### ##### #####obtain random effects estimates for individual wetlands RandomEffects <- ranef(top.model) WetlandsList <- sort(unique(Ponded$Wetland), descending = FALSE) RandomEffectsBinomialNonRowcrops <- data.frame(Wetland = WetlandsList) RandomEffectsBinomialNonRowcrops$RandomEffect <- RandomEffects$Wetland[1:nrow(RandomEffects$Wetland),] head(RandomEffectsBinomialNonRowcrops) tail(RandomEffectsBinomialNonRowcrops) #write to csv file for making predictions #write.csv(RandomEffectsBinomialNonRowcrops, "RandomEffectsBinomialNonRowcrops.csv") #merge random effects with existing dataset Ponded <- merge(Ponded, RandomEffectsBinomialNonRowcrops, by.x = "Wetland", by.y = "Wetland") ##### ##### #####Make manual predictions with best-supported generalized linear mixed model #with fixed and random effects included Ponded$Pred.Presence.ManualFixedAndRandom <- NA for(i in 1:nrow(Ponded)){ Ponded$Pred.Presence.ManualFixedAndRandom[[i]] = Ponded$RandomEffect[[i]]+ + 0.71939 * Ponded$SemiPerm[[i]]+ - 0.87143 * Ponded$Temporary[[i]]+ - ((1.76324 * Ponded$log.NewfootPA[[i]] - 1.76324 * -4.20119) / 0.62257)+ + ((0.42099 * Ponded$log.lsprs[[i]] - 0.42099 * 5.38196) / 0.31275)+ - ((0.60281 * Ponded$fatmaxm[[i]] - 0.60281 * 16.08833) / 1.12164)+ + ((0.16996 * Ponded$log.wprs[[i]] - 0.16996 * 4.23183) / 0.79371)+ - ((0.92181 * Ponded$wtmaxm[[i]] - 0.92181 * 6.53900) / 1.42316) } Ponded$Pred.Presence.ManualFixedAndRandom <- ilogit(Ponded$Pred.Presence.ManualFixedAndRandom) #test predictions with basic auc test retain <- c("Wetland", "Presence", "Pred.Presence.ManualFixedAndRandom") Presence.GLMM.Validation <- Ponded[retain] Presence.GLMM.AUC <- auc(Presence.GLMM.Validation, st.dev = TRUE, which.model = 1, na.rm = FALSE) Presence.GLMM.AUC #generate auc/roc plot Presence.GLMM.AUC.Plot <- auc.roc.plot(Presence.GLMM.Validation, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model") #create optimum threshold plot for predictive model Presence.GLMM.Optimal.Threshold <- error.threshold.plot(Presence.GLMM.Validation, threshold = 101, which.model = 1, na.rm = FALSE, xlab = "Threshold value", ylab = "Accuracy measure", main = "Determining wetland inundation threshold value", plot.it = TRUE, opt.thresholds = TRUE, vert.lines = TRUE) Presence.GLMM.Optimal.Threshold ##### ##### #####determine mean predicted and observed ponding events in testing sets Predicted <- Presence.GLMM.CrossValidation$PredObsData[1] Observed <- Presence.GLMM.CrossValidation$PredObsData[2] mean(Predicted$pred) mean(Observed$obs) #with k = 20 Presence.GLMM.CrossValidation2 <- cv.glmm(Ponded, top.model, k = 20, cvData = TRUE) str(Presence.GLMM.CrossValidation2) head(Presence.GLMM.CrossValidation2$PredObsData) summary(Presence.GLMM.CrossValidation2) Presence.GLMM.CrossValidation2$delta Presence.GLMM.CrossValidation2$CV #get auc score and produce ROC plot ID <- rep(1:nrow(Ponded)) Presence.GLMM.CrossValidation.Stats2 <- Presence.GLMM.CrossValidation2$PredObsData Presence.GLMM.CrossValidation.Stats2 <- cbind(Presence.GLMM.CrossValidation.Stats2, ID) Presence.GLMM.CrossValidation.Stats2 <- Presence.GLMM.CrossValidation.Stats2[, c(3,2,1)] Presence.GLMM.CrossValidation.AUC2 <- auc(Presence.GLMM.CrossValidation2, st.dev = TRUE, which.model = 1, na.rm = FALSE) Presence.GLMM.CrossValidation.AUC2 Presence.GLMM.CrossValidation.ROC2 <- auc.roc.plot(Presence.GLMM.CrossValidation2, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model") #with k = 100 Presence.GLMM.CrossValidation3 <- cv.glmm(Ponded, top.model, k = 100, cvData = TRUE) str(Presence.GLMM.CrossValidation3) head(Presence.GLMM.CrossValidation3$PredObsData) summary(Presence.GLMM.CrossValidation3) Presence.GLMM.CrossValidation3$delta Presence.GLMM.CrossValidation3$CV #get auc score and produce ROC plot ID <- rep(1:nrow(Ponded)) Presence.GLMM.CrossValidation.Stats3 <- Presence.GLMM.CrossValidation3$PredObsData Presence.GLMM.CrossValidation.Stats3 <- cbind(Presence.GLMM.CrossValidation.Stats3, ID) Presence.GLMM.CrossValidation.Stats3 <- Presence.GLMM.CrossValidation.Stats3[, c(3,2,1)] Presence.GLMM.CrossValidation.AUC3 <- auc(Presence.GLMM.CrossValidation3, st.dev = TRUE, which.model = 1, na.rm = FALSE) Presence.GLMM.CrossValidation.AUC3 Presence.GLMM.CrossValidation.ROC3 <- auc.roc.plot(Presence.GLMM.CrossValidation3, threshold = 101, find.auc = TRUE, xlab = "False positive rate", ylab = "True positive rate", main = NULL, col = "black", opt.thresholds = FALSE, legend.text = "Inundation model") ##### ##### #####detect and address non-linear relationships between the residuals of the #best-supported linear mixed model and predictors, in order to gauge if it is #appropriate to incorporate smoothing terms via generalized additive mixed modeling #not subset by any nominal variable xyplot(resid(top.model) ~ Ponded$scaled.log.NewfootPA, type = 'smooth') xyplot(resid(top.model) ~ Ponded$scaled.log.lsprs, type = 'smooth') xyplot(resid(top.model) ~ Ponded$scaled.fatmaxm, type = 'smooth') xyplot(resid(top.model) ~ Ponded$scaled.wtmaxm, type = 'smooth') #subset by wetland type xyplot(resid(top.model) ~ Ponded$scaled.log.NewfootPA|Ponded$Wettype, type = 'smooth') xyplot(resid(top.model) ~ Ponded$scaled.log.lsprs|Ponded$Wettype, type = 'smooth') xyplot(resid(top.model) ~ Ponded$scaled.fatmaxm|Ponded$Wettype, type = 'smooth') xyplot(resid(top.model) ~ Ponded$scaled.wtmaxm|Ponded$Wettype, type = 'smooth') #subset by year xyplot(resid(top.model) ~ Ponded$scaled.log.NewfootPA|Ponded$Year, type = 'smooth') xyplot(resid(top.model) ~ Ponded$scaled.log.lsprs|Ponded$Year, type = 'smooth') xyplot(resid(top.model) ~ Ponded$scaled.fatmaxm|Ponded$Year, type = 'smooth') xyplot(resid(top.model) ~ Ponded$scaled.wtmaxm|Ponded$Year, type = 'smooth')