########################### ## Analyses of Experimental Scale and Complexity ## for Witman et al. Towards an integration of scale and complexity in marine ecology ## ## Jarrett Byrnes & Robert Lamb ## Last Modified 11/24/2014 ########################### #load libraries for analysis and visualization library(ggplot2) library(car) library(plyr) library(lavaan) library(wesanderson) #from https://github.com/karthik/wesanderson library(reshape2) library(MASS) library(multcomp) library(contrast) library(vegan) library(BiodiversityR) library(RColorBrewer) library(quantreg) library(scales) #Read in the data comp <- read.csv("../Stats files/R.input.csv", head=T) #apply some transformations to the data for later analyses comp <- within(comp,{ log.Spatial.extent <- log(Spatial.extent+1) log.Glob.spatial <- log(Glob.spatial+1) log.Spatial.grain <- log(Spatial.grain+1) log.Duration <- log(Duration+1) log.Factors <- log(Factors+1) log.Species <- log(Species+1) log.Treatment.comb <- log(Treatment.comb+1) Species100 <- Species/100 }) #fix some issues with factors, etc. comp$System <- as.character(comp$System) comp$System <- gsub(" ", "", comp$System ) comp$System <- factor(comp$System) ################## ## SEM analysis ################## #check for extreme collinearity cor(comp[,c(5:12)]) #An SEM Approach mod <- 'ExptComplexity =~ log.Species + Treatment.comb + Factors log.Duration ~ Year log.Spatial.grain ~ Year log.Spatial.extent ~ Year + log.Spatial.grain ExptComplexity ~ Year + log.Duration + log.Spatial.extent + log.Spatial.grain Treatment.comb ~~ Factors' fit <- sem(mod, data=comp, estimator="MLM") summary(fit, rsq=T, standardized=T) sink("../figures/sem_out.txt") summary(fit, rsq=T, standardized=T) sink() semPaths(fit, what="std", layout="tree2", exoCov = FALSE) #examine residuals res <- residuals(fit, "casewise") par(mfrow=c(2,3)) for(i in 1:6){ qqnorm(res[,i], main=colnames(res)[i]) qqline(res[,i]) } par(mfrow=c(1,1)) #non-latent variable model mod2 <- 'log.Duration ~ Year log.Spatial.grain ~ Year log.Spatial.extent ~ Year + log.Spatial.grain #regressions log.Species ~ Year + log.Duration + log.Spatial.extent + log.Spatial.grain Treatment.comb ~ Year + log.Duration + log.Spatial.extent + log.Spatial.grain Factors ~ Year + log.Duration + log.Spatial.extent + log.Spatial.grain #final covarince Treatment.comb ~~ Factors' fit2 <- sem(mod2, data=comp, meanstructure=T) sink("../figures/sem_out2.txt") summary(fit2, rsq=T, standardized=T) sink() summary(fit2) #compare them to see if simplifiying via a latent variable approach is a-ok anova(fit, fit2) ################## ## SEM analysis with GLMMs ################## library(piecewiseSEM) #first, get our factor #An SEM Approach comp$Species100 <- comp$Species modComplex <- 'ExptComplexity =~ log.Species + log.Treatment.comb + log.Factors' fitComplex <- sem(modComplex, data=comp, estimator="MLM", meanstructure=T) #get factor scores ExptComplexity <- predict(fitComplex)+abs(min(predict(fitComplex)))+1 comp$ExptComplexity <- c(ExptComplexity[1:68], NA, ExptComplexity[69:310]) modList <- list( glm(Duration ~ Year, data=comp, family=Gamma(link="log")), glm(Spatial.grain+0.01 ~ Year, data=comp, family=Gamma(link="log"),control=list(maxit=10000)), glm(Spatial.extent ~ Year+Spatial.grain, data=comp, family=Gamma(link="log")), glm(ExptComplexity ~ Year + Duration + Spatial.extent + Spatial.grain, data=comp, family=Gamma(link="log")) ) get.sem.coefs(modList, data=comp, corr.errors="log.Duration ~~ log.Spatial.grain") get.fisher.c(modList, data=comp, corr.errors="log.Duration ~~ log.Spatial.grain") ############################# #BIPLOTS #GLMs - year versus all complexity variables, ggplot of all glm fits # NB or Gamma error generation ############################### #compMelt <- melt(comp, id.vars = names(comp)[1:4], measure.vars=names(comp)[c(5:8,9,10:11)]) compMelt <- melt(comp, id.vars = c("Author", "Year", "Year.block", "Journal", "System"), measure.vars=c("Duration", "Spatial.grain", "Spatial.extent", "Glob.spatial", "Species", "Factors", "Treatment.comb")) #get which regressions = different from 0 #subDF <- subset(compMelt, compMelt$variable=="Glob.spatial") manyfits <- dlply(compMelt, .(variable), function(subDF) { print(subDF$variable[1]) if(sum(round(subDF$value)==subDF$value, na.rm=T)==nrow(subDF)){ # print("NB") reg <- glm(value ~ Year, data=subDF,family=quasipoisson(link="log")) if(summary(reg)$dispersion>1){ reg <- glm.nb(value ~ Year, data=subDF, control=list(maxit=1000, trace=FALSE, epsilon=1e-8)) } }else{ # print("Gamma") #deal with an error due to some tiny experimental scales if(as.character(subDF$variable[1])=="Glob.spatial") subDF$value <- subDF$value +0.1 reg <- glm(value+1 ~ Year, data=subDF, family=Gamma(link="log"), control=list(maxit=10000)) } return(reg) }) pvals <- sapply(manyfits, function(afit) Anova(afit)[1,3]) sigVars <- as.character(unique(compMelt$variable))[which(pvals<=0.05)] r2s <- sapply(manyfits, function(afit) r2.corr(afit)) families <- sapply(manyfits, function(afit) afit$family$family) links <- sapply(manyfits, function(afit) afit$family$link) write.csv(data.frame(Variable=names(manyfits), P = pvals, r2Corr = r2s, GLM_Family=families, link=links), "../figures/modelTab.csv", row.names=F) #qplot using pvalues from above to only show significant fits biplots <- qplot(Year, value, data=compMelt, facets=~variable, alpha=I(0.4), size=I(3))+ facet_wrap(~variable, scales="free_y") + stat_smooth(method="glm.nb", fill=NA, color="red", lwd=2, data=subset(compMelt, as.character(compMelt$variable) %in% sigVars)) + #stat_smooth(method="glm", fill=NA, color="red", lwd=2, # data=subset(compMelt, as.character(compMelt$variable) %in% sigVars)) + theme_bw(base_size=16) + ylab("Value \n") + scale_x_continuous(breaks=seq(1960, 2010,10)) + theme(axis.text.x=element_text(angle=-90, vjust=0.5)) pdf("../figures/biplots.pdf") biplots dev.off() #### Significance tests using chi-square tests and likelihood ratios (model-II regression) to test for differences between means for each decade and variable##### #subDF <- subset(compMelt, compMelt$variable=="Glob.spatial") manytests <- dlply(compMelt, .(variable), function(subDF) { print(subDF$variable[1]) if(sum(round(subDF$value)==subDF$value, na.rm=T)==nrow(subDF)){ # print("NB") reg <- glm(value+1 ~ Year.block, data=subDF,family=quasipoisson(link="log")) if(summary(reg)$dispersion>1){ reg <- glm.nb(value+1 ~ Year.block, data=subDF, control=list(maxit=1000, trace=FALSE, epsilon=1e-8)) } }else{ # print("Gamma") #deal with an error due to some tiny experimental scales if(as.character(subDF$variable[1])=="Glob.spatial") subDF$value <- subDF$value +0.1 reg <- glm(value+1 ~ Year.block, data=subDF, family=Gamma(link="log"), control=list(maxit=10000)) } return(reg) }) testpvals <- sapply(manytests, function(afit) Anova(afit)[1,3]) testsigVars <- as.character(unique(compMelt$variable))[which(testpvals<=0.05)] testtable <- sapply(manytests, function(afit) Anova(afit)) testfamilies <- sapply(manytests, function(afit) afit$family$family) testlinks <- sapply(manytests, function(afit) afit$family$link) write.csv(testtable, file="yearanovamodels.csv") write.csv(data.frame(Variable=names(manytests), tests = testtable, GLM_Family=testfamilies, link=testlinks), "../figures/anovamodelTab.csv", row.names=F) ###Plot means by decade### #For color colstart = 0 colend = 1 pal <- wes.palette(name = "Zissou", type = "continuous") cols <- pal(length(unique(comp$Year.block))) #cols <- wes.palette(name = "Zissou", 7,type = "continuous") cols[cols=="#3B9AB2"]="purple" #cols= brewer.pal(6,"Spectral") testcompMelt<- melt(comp[which(comp$Treatment.comb!="NA"),], id.vars = c("Author", "Year", "Year.block", "Journal", "System"),measure.vars=c("Duration", "Spatial.grain", "Spatial.extent", "Glob.spatial", "Species", "Factors", "Treatment.comb")) testmeans.sem=ddply(testcompMelt, c("Year.block", "variable"), summarise,mean=mean(value), sem=sd(value)/sqrt(length(value))) testmeans.sem <- transform(testmeans.sem, lower=mean-sem, upper=mean+sem) yearmeans.barplot <- qplot(x=Year.block, y=mean, fill=Year.block, data=testmeans.sem, geom="bar", stat="identity", position="dodge", xlab="", ylab="Mean +/- SE", main="") + facet_wrap(~variable, scales="free_y", ncol=2, nrow=4) +theme_bw() + geom_errorbar(aes(ymax=upper, ymin=lower), position=position_dodge(0.9), data=testmeans.sem) + scale_fill_manual(values=cols) pdf(file="yearplots_color.pdf") yearmeans.barplot dev.off() #Species, factors, and spatial grain significant, lets do post-hoc contrasts spec=glm.nb(Species ~ Year.block, link=log,control=list(maxit=1000, trace=FALSE, epsilon=1e-8), data=comp) spat=glm(Spatial.grain+1 ~ Year.block, family=Gamma(link="log"), control=list(maxit=10000), data=comp) fac=glm(Factors+1 ~ Year.block, data=comp,family=quasipoisson(link="log")) treat=glm(Treatment.comb+1 ~ Year.block, data=comp,family=Gamma(link="log")) species=summary(glht(spec, mcp(Year.block="Tukey"), test = adjusted("fdr"))) spatial=summary(glht(spat, mcp(Year.block="Tukey"), test = adjusted("fdr"))) factors=summary(glht(fac, mcp(Year.block="Tukey"), test = adjusted("fdr"))) treats=summary(glht(treat, mcp(Year.block="Tukey"), test = adjusted("fdr"))) speciescon=confint(glht(spec, mcp(Year.block="Tukey"), test = adjusted("fdr"))) speciesco=speciescon$`confint` speciesp=species$`test`$pvalues tukspecies=cbind(speciesco, speciesp) spatialcon=confint(glht(spat, mcp(Year.block="Tukey"))) spatialco=spatialcon$`confint` spatialp=spatial$`test`$pvalues tukspatial=cbind(spatialco, spatialp) factorscon=confint(glht(fac, mcp(Year.block="Tukey"))) factorsco=speciescon$`confint` factorsp=factors$`test`$pvalues tukfactors=cbind(factorsco, factorsp) yeartuks=rbind(tukspecies, tukfactors, tukspatial) write.csv(yeartuks, file="yeartukey.csv") ########### 3D Visualizations##### #let's try persp #For color colstart = 0 colend = 1 pal <- wes.palette(name = "Zissou", type = "continuous") cols <- pal(length(unique(comp$Year.block))) #cols <- wes.palette(name = "Zissou", 6,type = "continuous") cols[cols=="#3B9AB2"]="purple" #cols= brewer.pal(6,"Spectral") grdVals <- function(x, bound=0.01) seq(range(x)[1]-bound, range(x)[2]+bound, length.out=50) makeScatterBox <- function(x,y,z, bound=0, ...){ x <- grdVals(x, bound=bound) y <- grdVals(y, bound=bound) z <- grdVals(z, bound=bound) z <- matrix(rep(z, 50), nrow=50) p <- persp(x,y,z, col=NA, border=NA, ...) p } scatterPlot3d <- function(x,y,z, pch=19, cex=1, col=cols[unclass(comp$Year.block)], bound=1, ...){ #eliminate any NAs idx <- unique(c(which(is.na(x)), which(is.na(y)), which(is.na(z)))) if(length(idx)>0){ x <- x[-idx] y <- y[-idx] z <- z[-idx] } p <- makeScatterBox(x,y,z, ...) points(trans3d(x,y, z, p), pch=pch, col=col, cex=cex) } pdf("../figures/2a_3dplot.pdf") scatterPlot3d(comp$Year, comp$Treatment.comb, comp$log.Spatial.extent, xlab="Year", ylab="# Treatment Combinations", zlab="Log Spatial Extent (m)", ticktype="detailed", theta=-390, phi=15) dev.off() pdf("../figures/2b_3dplot.pdf") scatterPlot3d(comp$Year, comp$log.Species, comp$log.Spatial.extent, xlab="Year", ylab="Log # of Species", zlab="Log Spatial Extent (m)", ticktype="detailed", theta=-390, phi=15) dev.off() #pdf("../figures/3_3dplot2.pdf") #scatterPlot3d(comp$log.Species, comp$log.Treatment.comb, comp$log.Spatial.extent, # xlab="Log # Species", ylab="Log # Treatment Combinations", zlab="Log Spatial Extent (m)", # ticktype="detailed", theta=300, phi=10) #dev.off() pdf("../figures/4_3dplot.pdf") scatterPlot3d(comp$log.Species[which(comp$log.Glob.spatial!="NA")], comp$log.Glob.spatial[which(comp$log.Glob.spatial!="NA")], comp$log.Spatial.grain[which(comp$log.Glob.spatial!="NA")], xlab="Log # Species", ylab="Log Global Spatial Extent", zlab="Log Spatial Grain (m)", ticktype="detailed", theta=225, phi=15) dev.off() ###Comparisons between different habitat types### #Remove studies across habitats dat=comp[which(comp$System!="CRRS"),] dats=dat[which(dat$System!="RSRI"),] dats$System=factor(dats$System) #Add an interaction variable for post-hoc tests dats$int=with(dats, interaction (System, Journal, sep="x")) #Creating a dummy matrix for false discovery rate post-hoc tests of interaction terms #K1 <- glht(dur, mcp(System = "Tukey"))$linfct #K2 <- glht(dur, mcp(Journal = "Tukey"))$linfct #summary(glht(dur, linfct = rbind(K1, K2))) tmp <- expand.grid(System = unique(dats$System), Journal = unique(dats$Journal)) X <- model.matrix(~ System * Journal, data = tmp) #X2 <-model.matrix(~ Journal * System, data = tmp) #glht(dur, linfct = X) #predict(dur, newdata=tmp) Tukey <- contrMat(table(dats$System), "Tukey") K1 <- cbind(Tukey, matrix(0, nrow = nrow(Tukey), ncol = ncol(Tukey))) rownames(K1) <- paste(levels(dats$Journal)[1], rownames(K1), sep = ":") K2 <- cbind(matrix(0, nrow = nrow(Tukey), ncol = ncol(Tukey)), Tukey) rownames(K2) <- paste(levels(dats$Journal)[2], rownames(K2), sep = ":") K <- rbind(K1, K2) colnames(K) <- c(colnames(Tukey), colnames(Tukey)) #summary(glht(dur, linfct = K %*% X)) #Duration dur=glm(Duration+1 ~ System*Journal, family=Gamma(link="log"), control=list(maxit=10000), data=dats) summary(dur) adur=Anova(dur, type="II", test.statistics="LR") sumdur1=summary(glht(dur, mcp(System="Tukey"), test = adjusted("fdr"))) sumdur2=summary(glht(dur, mcp(Journal="Tukey"), test = adjusted("fdr"))) sumdur3=summary(glht(dur, linfct = K %*% X), test = adjusted("fdr")) durcon=confint(glht(dur, mcp(System="Tukey"))) durco=durcon$`confint` durp=sumdur1$`test`$pvalues tukdur=cbind(durco, durp) #dur.int=glm(Duration+1 ~ int, family=Gamma(link="log"), control=list(maxit=10000), data=dats) #summary(glht(dur.int, mcp(int="Tukey"))) #anova(dur, test="F") #TukeyHSD(aov(dur), ordered=T) #Factors fac=glm(Factors ~ System*Journal, family=quasipoisson (link="log"), data=dats) summary(fac) afac=Anova(fac, type="II", test.statistics="LR") sumtreat1=summary(glht(fac, mcp(System="Tukey"), test = adjusted("fdr"))) sumtreat2=summary(glht(fac, mcp(Journal="Tukey"), test = adjusted("fdr"))) sumtreat3=summary(glht(fac, linfct = K %*% X), test = adjusted("fdr")) #Treatment combinations treat=glm(Treatment.comb ~ System*Journal, family=Gamma(link="log"), control=list(maxit=10000), data=dat[which(dats$Treatment.comb!="NA"),]) summary(treat) atreat=Anova(treat, type="II", test.statistics="LR") sumtreat1=summary(glht(treat, mcp(System="Tukey"), test = adjusted("fdr"))) sumtreat2=summary(glht(treat, mcp(Journal="Tukey"), test = adjusted("fdr"))) sumtreat3=summary(glht(treat, linfct = K %*% X), test = adjusted("fdr")) #treat.int=glm(Treatment.comb+1 ~ int, family=Gamma(link="log"), control=list(maxit=10000), data=dats) #summary(glht(treat.int, mcp(int="Tukey"))) #Species spec=glm.nb(Species ~ System*Journal, link=log,control=list(maxit=1000, trace=FALSE, epsilon=1e-8), data=dats) summary(spec) aspec=Anova(spec, type="II", test.statistics="LR") sumspec1=summary(glht(spec, mcp(System="Tukey"), test = adjusted("fdr"))) sumspec2=summary(glht(spec, mcp(Journal="Tukey"), test = adjusted("fdr"))) sumspec3=summary(glht(spec, linfct = K %*% X), test = adjusted("fdr")) spccon=confint(glht(spec, linfct = K %*% X)) spcco=spccon$`confint` spcp=sumspec3$`test`$pvalues tukspc=cbind(spcco, spcp) spccon2=confint(glht(spec, mcp(System="Tukey"), test = adjusted("fdr"))) spcco2=spccon2$`confint` spcp2=sumspec1$`test`$pvalues tukspc2=cbind(spcco2, spcp2) #spec.int=glm.nb(Species+1 ~ int,link="log", data=dats) #summary(glht(spec.int, mcp(int="Tukey"))) #Spatial grain spat=glm(Spatial.grain+1 ~ System*Journal, family=Gamma(link="log"), control=list(maxit=10000), data=dats) summary(spat) aspat=Anova(spat, type="II", test.statistics="LR") sumspat1=summary(glht(spat, mcp(System="Tukey"), test = adjusted("fdr"))) sumspat2=summary(glht(spat, mcp(Journal="Tukey"), test = adjusted("fdr"))) sumspat3=summary(glht(spat, linfct = K %*% X), test = adjusted("fdr")) #spat.int=glm(Spatial.grain+1 ~ int, family=Gamma(link="log"), control=list(maxit=10000), data=dats) #summary(glht(spat.int, mcp(int="Tukey"))) #Global spatial scale gspat=glm(Glob.spatial+1 ~ System*Journal, data=dats, family=Gamma(link="log"), control=list(maxit=10000)) summary(gspat) #agspat=Anova(gspat, type="II", test.statistics="LR") #Gamma hurdle approach to global spatial dats$non_zeroGS <- ifelse(dats$Glob.spatial > 0, 1, 0) m1 <- glm(non_zeroGS ~ System*Journal, data = dats, family = binomial(link = logit)) m2 <- glm(Glob.spatial ~ System*Journal, data = subset(dats, non_zeroGS == 1), family = Gamma(link = log), control=list(maxit=10000)) Anova(m1) summary(update(m1, .~.-1)) agspat=Anova(m2) summary(update(m2, .~.-1)) summary(m2) sumzero1=summary(glht(m1, mcp(System="Tukey"), test = adjusted("fdr"))) sumzero2=summary(glht(m1, mcp(Journal="Tukey"), test = adjusted("fdr"))) sumzero3=summary(glht(m1, linfct = K %*% X), test = adjusted("fdr")) sumgspat1=summary(glht(gspat, mcp(System="Tukey"), test = adjusted("fdr"))) sumgspat2=summary(glht(gspat, mcp(Journal="Tukey"), test = adjusted("fdr"))) sumgspat3=summary(glht(gspat, linfct = K %*% X), test = adjusted("fdr")) gscon=confint(glht(gspat, linfct = K %*% X)) gsco=gscon$`confint` gsp=sumgspat3$`test`$pvalues tukgs=cbind(gsco, gsp) #Spatial extent sext=glm(Spatial.extent+1 ~ System*Journal, family=Gamma(link="log"), control=list(maxit=10000), data=dats) summary(sext) asext=Anova(sext, type="II", test.statistics="LR") sumsext1=summary(glht(sext, mcp(System="Tukey"), test = adjusted("fdr"))) sumsext2=summary(glht(sext, mcp(Journal="Tukey"), test = adjusted("fdr"))) sumsext3=summary(glht(sext, linfct = K %*% X), test = adjusted("fdr")) sxcon=confint(glht(sext, linfct = K %*% X)) sxco=sxcon$`confint` sxp=sumsext3$`test`$pvalues tuksx=cbind(sxco, sxp) #write.csv(sumsext3, file="spatial_extent.csv") #sext.int=glm(Spatial.extent+1 ~ int, family=Gamma(link="log"), data=dats) #summary(glht(sext.int, mcp(int="Tukey"))) #Chi-square table results write.csv(print(rbind(adur, aspat, asext, agspat, aspec, atreat)), file="anovas.csv") sumdur1$`test`$pvalues sumdur1$`pvalues` #False discovery rate post-hoc comparisons for relevant contrasts rbind(tukdur, tuksx, tukgs, tukspc2, tukspc ) write.csv(rbind(tukdur, tuksx, tukgs, tukspc2, tukspc), file="finaltukey.csv") ###Mean and SD values for manuscript### spat=tapply(dats$Spatial.extent, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE) glob=tapply(dats$Glob.spatial, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE) grain=tapply(dats$Spatial.grain, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE) dur=tapply(dats$Duration, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE) sp=tapply(dats$Species, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE) treat=tapply(dats$Treatment.comb, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE) fac=tapply(dats$Factors, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE) allmeans=print(rbind(spat, glob, grain, dur, sp, treat, fac)) spat2=tapply(dats$Spatial.extent, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE) glob2=tapply(dats$Glob.spatial, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE) grain2=tapply(dats$Spatial.grain, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE) dur2=tapply(dats$Duration, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE) sp2=tapply(dats$Species, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE) treat2=tapply(dats$Treatment.comb, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE) fac2=tapply(dats$Factors, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE) allsds=print(rbind(spat2, glob2, grain2, dur2, sp2, treat2, fac2)) #Mean and standard error values by year block for manuscript table ymeans <- ddply(comp, .(Year.block), summarise, Glob.spatial = mean(na.omit(Glob.spatial)), Spatial.extent = mean(Spatial.extent), Spatial.grain = mean(Spatial.grain), Duration = mean(Duration), Species = mean(Species), Factors= mean(Factors), Treatment.comb = mean(na.omit(Treatment.comb)), N = length(Year.block) ) serrs <- ddply(dat, .(Year.block), summarise, Glob.spatial = sd(na.omit(Glob.spatial))/sqrt(length(na.omit(Glob.spatial))), Spatial.extent = sd(Spatial.extent)/sqrt(length(Spatial.extent)), Spatial.grain = sd(Spatial.grain)/sqrt(length(Spatial.grain)), Duration = sd(Duration)/sqrt(length(Duration)), Species = sd(Species)/sqrt(length(Species)), Factors= sd(Factors)/sqrt(length(Factors)), Treatment.comb = sd(na.omit(Treatment.comb))/sqrt(length(na.omit(Treatment.comb))) ) write.csv(ymeans, file="meanvals.csv") write.csv(serrs, file="serrvals.csv") #Site replication for manuscript table table1=ddply(comp, .(Year.block), summarise, Replication = length(Year.block[which(Glob.spatial>0)]), N = length(Year.block), Percent= Replication/N ) write.csv(table1, file="replication.csv") #Plot mean and SE values for each variable against habitat type and journal #condense dataset for ease of plotting #Make two plots, one for each journal massmelt=melt(dats, id.vars=c("Year.block","Journal", "System"), measure.vars=c("log.Treatment.comb", "log.Species", "log.Duration", "log.Spatial.grain", "log.Glob.spatial", "log.Spatial.extent")) #Make a plot for all journals combined massmelt=melt(dats, id.vars=c("Year.block", "System"), measure.vars=c("log.Treatment.comb", "log.Species", "log.Duration", "log.Spatial.grain", "log.Glob.spatial", "log.Spatial.extent")) #Plot for true values massmelt=melt(dats, id.vars=c("Year.block", "System", "Journal"), measure.vars=c("Duration", "Spatial.grain", "Spatial.extent", "Glob.spatial", "Species", "Factors", "Treatment.comb")) head(massmelt) #Remove problematic entries for cross-system comparisons and one study where treamtent combinations were not available massmelt=massmelt[which(massmelt$value!="NA"),] means.sem=ddply(massmelt, c("System", "variable", "Journal"), summarise,mean=mean(value), sem=sd(value)/sqrt(length(value))) means.sem <- transform(means.sem, lower=mean-sem, upper=mean+sem) bmeans.barplot <- qplot(x=Journal, y=mean, colour=variable, fill=System, data=means.sem, geom="bar", stat="identity", position="dodge", xlab="", ylab="Mean +/- SE", main="") + facet_wrap(~variable, scales="free_y", ncol=2, nrow=4) + scale_fill_grey(start = 0, end = .95) +theme_bw()+ geom_errorbar(aes(ymax=upper, ymin=lower), position=position_dodge(0.9), data=means.sem)+scale_color_grey(end=0) pdf(file="Figure 6.pdf") bmeans.barplot dev.off() levels(compMelt$variable) <- gsub("Spatial.grain", "Spatial Grain", levels(compMelt$variable)) levels(compMelt$variable) <- gsub("Glob.spatial", "Global Spatial Extent", levels(compMelt$variable)) levels(compMelt$variable) <- gsub("comb", "Combinations", levels(compMelt$variable)) levels(compMelt$variable) <- gsub("\\.", " ", levels(compMelt$variable)) ######## A function to generate predicted values from quantile # regressions at different levels of Tau quantRegLines.values <- function(rq_obj, lincol="red", logTrans=T, se="rank", lwd=1, lty=1, ns=T, lwd.ns = 0.5, lty.ns = 2, alpha=0.05, ...){ #get the taus taus <- rq_obj$tau lwd=rep(lwd, length(rq_obj$tau)) lty=rep(lty, length(rq_obj$tau)) #get the p-values if(se != "rank"){ p <- sapply(summary(rq_obj, se=se), function(adf) coef(adf)[2,4]) }else{ p <- as.numeric(sapply(summary(rq_obj, se="rank"), function(adf) 0 == sign(coef(adf)[2,2])+sign(coef(adf)[2,3]) )) } if(ns){ nsVals <- which(p>0.05) lwd[nsVals] <- lwd.ns lty[nsVals] <- lty.ns } #get x x <- rq_obj$x[,2] #assumes no intercept xx <- seq(min(x, na.rm=T),max(x, na.rm=T),1) #calculate y over all taus f <- coef(rq_obj) yy <- cbind(1,xx)%*%f if(logTrans) yy <- exp(yy)-1 if(length(lincol)==1) lincol=rep(lincol, length(taus)) yy <- as.data.frame(yy) yy$x <- xx yyMelt <- melt(yy, "x", variable.name = "tau") yyMelt$lwd <- as.vector(sapply(lwd, function(l) rep(l, length(xx)))) yyMelt$lty <- as.vector(sapply(lty, function(l) rep(l, length(xx)))) yyMelt$p <- as.vector(sapply(p, function(l) rep(l, length(xx)))) yyMelt$tau <- gsub("tau\\=", "", yyMelt$tau) #geom_line(data=yyMelt, mapping=aes(x=x, y=value), lwd=yyMelt$lwd, lty=yyMelt$lty) #plot all lines # for(i in 1:length(taus)){ # lines(xx,yy[,i], col=lincol[i], lwd=lwd[i], lty=lty[i], ...) # } yyMelt } ########### #GGPLOT2 the biplots with quantile fits taus <- seq(0.05, 0.95, .1) rq_lines <- dlply(compMelt, .(variable), function(subDF) { print(subDF$variable[1]) subDF <- na.omit(subDF) rq_fit <- rq(log(value+1)~(Year),data=subDF, tau=taus) # quantRegLines(rq_fit, lincol=rainbow(length(taus))) rq_fit }) names(rq_lines) <- levels(compMelt) rq_curves <- ldply(rq_lines, quantRegLines.values, se="ker") rq_curves$ns <- ifelse(rq_curves$p>0.05,"No","Yes") rq_biplots <- ggplot(data=compMelt, mapping=aes(x=Year, y=value)) + geom_point(alpha=0.5) + facet_wrap(~variable, scale="free_y") + geom_line(data=rq_curves, mapping=aes(x=x, y=value, color=tau, lty=ns, lwd=ns)) + scale_color_discrete(name = "Quantile (tau)") + scale_size_manual(name = "Different from 0", values=c(.5,1)) + scale_linetype_manual(name = "Different from 0", values=c(2,1)) + theme_bw(base_size=17, base_family="Helvetica") pdf("../figures/quantreg_biplots.pdf", width=12, height=8) rq_biplots dev.off() #A Function to extract confidence bands getBand <- function(rq_obj, se="ker"){ out <- lapply(summary(rq_obj, se=se), function(adf) coef(adf)[2,]) names(out) <- rq_obj$tau out <- ldply(out) names(out)[1] <- "tau" out } #write out the coefficients and t-tests for these values of tau tab <- ldply(rq_lines, getBand) write.csv(tab, "../figures/quanreg_table.csv") ########Coefficients and CIs across a wide range of tau values #subDF <- subset(compMelt, compMelt$variable=="Glob.spatial") #debug #many taus taus2 <- seq(0.05, 0.95, length.out=25) #make some fit rq objects rq_lines2 <- dlply(compMelt, .(variable), function(subDF) { print(subDF$variable[1]) subDF <- na.omit(subDF) rq_fit <- rq(log(value+1)~(Year),data=subDF, tau=taus2) rq_fit }) ######### #ggplot coefs and CIs across a wide range of tau values ######### tab2 <- ldply(rq_lines2, function(adf) getBand(adf, se="rank")) names(tab2) <- gsub(" ", "\\.", names(tab2)) tab2$tau <- as.numeric(tab2$tau) #need to take it back to a numeric #get rid of anomolous bad CI tab2$lower.bd[which(tab2$lower.bd < -1e300)] <- -Inf coefPlot <- ggplot(data=tab2, mapping=aes(x=tau, y=coefficients, ymin=lower.bd, ymax=upper.bd)) + facet_wrap(~variable, scale="free_y") + geom_ribbon(fill="grey", color=NA) + geom_point() + theme_bw(base_size=17, base_family="Helvetica")+ geom_abline(slop=0, intercept=0, lty=2) + ylab("Coefficient") + xlab("Quantile (tau)") + scale_x_continuous(breaks=seq(0.1,1,length.out=10)) pdf("../figures/quantreg_coefplots.pdf", width=12, height=8) coefPlot dev.off()