###########################
## Analyses of Experimental Scale and Complexity
## for Witman et al. Towards an integration of scale and complexity in marine ecology
##
## Jarrett Byrnes & Robert Lamb
## Last Modified 11/24/2014
###########################

#load libraries for analysis and visualization
library(ggplot2)
library(car)
library(plyr)
library(lavaan)
library(wesanderson) #from https://github.com/karthik/wesanderson
library(reshape2)
library(MASS)
library(multcomp)
library(contrast)
library(vegan)
library(BiodiversityR)
library(RColorBrewer)
library(quantreg)
library(scales)

#Read in the data
comp <- read.csv("../Stats files/R.input.csv",  head=T)


#apply some transformations to the data for later analyses
comp <- within(comp,{
  log.Spatial.extent <- log(Spatial.extent+1)
  log.Glob.spatial <- log(Glob.spatial+1)
  log.Spatial.grain <- log(Spatial.grain+1)
  log.Duration <- log(Duration+1)
  log.Factors <- log(Factors+1)
  log.Species <- log(Species+1)
  
  log.Treatment.comb <- log(Treatment.comb+1)
  
  
  Species100 <- Species/100
  
  
})

#fix some issues with factors, etc.
comp$System <- as.character(comp$System)
comp$System  <- gsub(" ", "", comp$System )
comp$System <- factor(comp$System)

##################
## SEM analysis
##################

#check for extreme collinearity
cor(comp[,c(5:12)])


#An SEM Approach
mod <- 'ExptComplexity =~  log.Species + Treatment.comb + Factors
log.Duration ~ Year
log.Spatial.grain ~ Year 
log.Spatial.extent ~ Year + log.Spatial.grain

ExptComplexity ~ Year + log.Duration + log.Spatial.extent + log.Spatial.grain
Treatment.comb ~~ Factors'

fit <- sem(mod, data=comp, estimator="MLM")

summary(fit, rsq=T, standardized=T)
sink("../figures/sem_out.txt")
summary(fit, rsq=T, standardized=T)
sink()
semPaths(fit, what="std", layout="tree2", exoCov = FALSE)

#examine residuals
res <- residuals(fit, "casewise")
par(mfrow=c(2,3))
for(i in 1:6){
  qqnorm(res[,i], main=colnames(res)[i])
  qqline(res[,i])
}
par(mfrow=c(1,1))

#non-latent variable model
mod2 <- 'log.Duration ~ Year
log.Spatial.grain ~ Year 
log.Spatial.extent ~ Year + log.Spatial.grain

#regressions
log.Species ~ Year + log.Duration + log.Spatial.extent + log.Spatial.grain
Treatment.comb ~ Year + log.Duration + log.Spatial.extent + log.Spatial.grain
Factors ~ Year + log.Duration + log.Spatial.extent + log.Spatial.grain

#final covarince
Treatment.comb ~~ Factors'

fit2 <- sem(mod2, data=comp, meanstructure=T)
sink("../figures/sem_out2.txt")

summary(fit2, rsq=T, standardized=T)
sink()

summary(fit2)

#compare them to see if simplifiying via a latent variable approach is a-ok
anova(fit, fit2)

##################
## SEM analysis with GLMMs
##################
library(piecewiseSEM)

#first, get our factor


#An SEM Approach
comp$Species100 <- comp$Species
modComplex <- 'ExptComplexity =~ log.Species + log.Treatment.comb + log.Factors'
fitComplex <- sem(modComplex, data=comp, estimator="MLM", meanstructure=T)

#get factor scores
ExptComplexity <- predict(fitComplex)+abs(min(predict(fitComplex)))+1
comp$ExptComplexity <- c(ExptComplexity[1:68], NA, ExptComplexity[69:310])
modList <- list(
  glm(Duration ~ Year, data=comp, family=Gamma(link="log")),
  glm(Spatial.grain+0.01 ~ Year, data=comp, family=Gamma(link="log"),control=list(maxit=10000)),
  glm(Spatial.extent ~ Year+Spatial.grain, data=comp, family=Gamma(link="log")),
  glm(ExptComplexity ~ Year + Duration +  Spatial.extent +  Spatial.grain, data=comp, family=Gamma(link="log"))
)

get.sem.coefs(modList, data=comp, corr.errors="log.Duration ~~ log.Spatial.grain")
get.fisher.c(modList, data=comp, corr.errors="log.Duration ~~ log.Spatial.grain")


#############################
#BIPLOTS
#GLMs - year versus all complexity variables, ggplot of all glm fits
# NB or Gamma error generation
###############################

#compMelt <- melt(comp, id.vars = names(comp)[1:4], measure.vars=names(comp)[c(5:8,9,10:11)])
compMelt <- melt(comp, id.vars = c("Author", "Year", "Year.block", "Journal", "System"), 
                 measure.vars=c("Duration", "Spatial.grain", "Spatial.extent", "Glob.spatial", "Species", "Factors", "Treatment.comb"))


#get which regressions = different from 0
#subDF <- subset(compMelt, compMelt$variable=="Glob.spatial")
manyfits <- dlply(compMelt, .(variable), function(subDF) {
  print(subDF$variable[1])
  if(sum(round(subDF$value)==subDF$value, na.rm=T)==nrow(subDF)){
    #  print("NB")
    reg <- glm(value ~ Year, data=subDF,family=quasipoisson(link="log"))
    if(summary(reg)$dispersion>1){
      reg <- glm.nb(value ~ Year, data=subDF,
                    control=list(maxit=1000, trace=FALSE, epsilon=1e-8))
    }
  }else{
    #  print("Gamma")
    #deal with an error due to some tiny experimental scales
    if(as.character(subDF$variable[1])=="Glob.spatial") subDF$value <- subDF$value +0.1
    
    reg <- glm(value+1 ~ Year, data=subDF, family=Gamma(link="log"), 
               control=list(maxit=10000))
    
  }
  
  return(reg)
})

pvals <- sapply(manyfits, function(afit) Anova(afit)[1,3])
sigVars <- as.character(unique(compMelt$variable))[which(pvals<=0.05)]
r2s <- sapply(manyfits, function(afit) r2.corr(afit))
families <- sapply(manyfits, function(afit) afit$family$family)
links <- sapply(manyfits, function(afit) afit$family$link)

write.csv(data.frame(Variable=names(manyfits), P = pvals, r2Corr = r2s,
                     GLM_Family=families, link=links),
          "../figures/modelTab.csv", row.names=F)


#qplot using pvalues from above to only show significant fits
biplots <- qplot(Year, value, data=compMelt, facets=~variable, alpha=I(0.4), size=I(3))+
  facet_wrap(~variable, scales="free_y") +
  stat_smooth(method="glm.nb", fill=NA, color="red", lwd=2,
              data=subset(compMelt, as.character(compMelt$variable) %in% sigVars)) +
  #stat_smooth(method="glm", fill=NA, color="red", lwd=2,
  #            data=subset(compMelt, as.character(compMelt$variable) %in% sigVars)) +
  theme_bw(base_size=16) + ylab("Value \n") +
  scale_x_continuous(breaks=seq(1960, 2010,10)) +
  theme(axis.text.x=element_text(angle=-90, vjust=0.5))

pdf("../figures/biplots.pdf")
biplots
dev.off()


#### Significance tests using chi-square tests and likelihood ratios (model-II regression) to test for differences between means for each decade and variable#####

#subDF <- subset(compMelt, compMelt$variable=="Glob.spatial")
manytests <- dlply(compMelt, .(variable), function(subDF) {
  print(subDF$variable[1])
  if(sum(round(subDF$value)==subDF$value, na.rm=T)==nrow(subDF)){
    #  print("NB")
    reg <- glm(value+1 ~ Year.block, data=subDF,family=quasipoisson(link="log"))
    if(summary(reg)$dispersion>1){
      reg <- glm.nb(value+1 ~ Year.block, data=subDF,
                    control=list(maxit=1000, trace=FALSE, epsilon=1e-8))
    }
  }else{
    #  print("Gamma")
    #deal with an error due to some tiny experimental scales
    if(as.character(subDF$variable[1])=="Glob.spatial") subDF$value <- subDF$value +0.1
    
    reg <- glm(value+1 ~ Year.block, data=subDF, family=Gamma(link="log"), 
               control=list(maxit=10000))
    
  }
  
  return(reg)
})

testpvals <- sapply(manytests, function(afit) Anova(afit)[1,3])
testsigVars <- as.character(unique(compMelt$variable))[which(testpvals<=0.05)]
testtable <- sapply(manytests, function(afit) Anova(afit))
testfamilies <- sapply(manytests, function(afit) afit$family$family)
testlinks <- sapply(manytests, function(afit) afit$family$link)

write.csv(testtable, file="yearanovamodels.csv")

write.csv(data.frame(Variable=names(manytests), tests = testtable,
                     GLM_Family=testfamilies, link=testlinks),
          "../figures/anovamodelTab.csv", row.names=F)

###Plot means by decade###

#For color
colstart = 0
colend = 1
pal <- wes.palette(name = "Zissou", type = "continuous")
cols <- pal(length(unique(comp$Year.block)))
#cols <- wes.palette(name = "Zissou", 7,type = "continuous")
cols[cols=="#3B9AB2"]="purple"
#cols= brewer.pal(6,"Spectral")

testcompMelt<- melt(comp[which(comp$Treatment.comb!="NA"),], id.vars = c("Author", "Year", "Year.block", "Journal", "System"),measure.vars=c("Duration", "Spatial.grain", "Spatial.extent", "Glob.spatial", "Species", "Factors", "Treatment.comb"))


testmeans.sem=ddply(testcompMelt, c("Year.block", "variable"), summarise,mean=mean(value), sem=sd(value)/sqrt(length(value)))
testmeans.sem <- transform(testmeans.sem, lower=mean-sem, upper=mean+sem)

yearmeans.barplot <- qplot(x=Year.block, y=mean, fill=Year.block, data=testmeans.sem, geom="bar",  stat="identity", position="dodge", xlab="", ylab="Mean +/- SE", main="") + facet_wrap(~variable, scales="free_y", ncol=2, nrow=4)  +theme_bw() + geom_errorbar(aes(ymax=upper, ymin=lower), position=position_dodge(0.9), data=testmeans.sem) +   scale_fill_manual(values=cols)

pdf(file="yearplots_color.pdf")
yearmeans.barplot
dev.off()
#Species, factors, and spatial grain significant, lets do post-hoc contrasts

spec=glm.nb(Species ~ Year.block, link=log,control=list(maxit=1000, trace=FALSE, epsilon=1e-8), data=comp)

spat=glm(Spatial.grain+1 ~ Year.block, family=Gamma(link="log"), control=list(maxit=10000), data=comp)

fac=glm(Factors+1 ~ Year.block, data=comp,family=quasipoisson(link="log"))

treat=glm(Treatment.comb+1 ~ Year.block, data=comp,family=Gamma(link="log"))

species=summary(glht(spec, mcp(Year.block="Tukey"), test = adjusted("fdr")))
spatial=summary(glht(spat, mcp(Year.block="Tukey"), test = adjusted("fdr")))
factors=summary(glht(fac, mcp(Year.block="Tukey"), test = adjusted("fdr")))

treats=summary(glht(treat, mcp(Year.block="Tukey"), test = adjusted("fdr")))


speciescon=confint(glht(spec, mcp(Year.block="Tukey"), test = adjusted("fdr")))
speciesco=speciescon$`confint`
speciesp=species$`test`$pvalues
tukspecies=cbind(speciesco, speciesp)

spatialcon=confint(glht(spat, mcp(Year.block="Tukey")))
spatialco=spatialcon$`confint`
spatialp=spatial$`test`$pvalues
tukspatial=cbind(spatialco, spatialp)

factorscon=confint(glht(fac, mcp(Year.block="Tukey")))
factorsco=speciescon$`confint`
factorsp=factors$`test`$pvalues
tukfactors=cbind(factorsco, factorsp)

yeartuks=rbind(tukspecies, tukfactors, tukspatial)

write.csv(yeartuks, file="yeartukey.csv")


########### 3D Visualizations#####
#let's try persp
#For color
colstart = 0
colend = 1
pal <- wes.palette(name = "Zissou", type = "continuous")
cols <- pal(length(unique(comp$Year.block)))
#cols <- wes.palette(name = "Zissou", 6,type = "continuous")
cols[cols=="#3B9AB2"]="purple"
#cols= brewer.pal(6,"Spectral")
grdVals <- function(x, bound=0.01) seq(range(x)[1]-bound, range(x)[2]+bound, length.out=50)

makeScatterBox <- function(x,y,z, bound=0, ...){
  x <- grdVals(x, bound=bound)
  y <- grdVals(y, bound=bound)
  z <- grdVals(z, bound=bound)
  z <- matrix(rep(z, 50), nrow=50)
  p <- persp(x,y,z, col=NA, border=NA, ...)
  
  p
  
}

scatterPlot3d <- function(x,y,z, pch=19, cex=1, col=cols[unclass(comp$Year.block)], bound=1, ...){
  #eliminate any NAs
  idx <- unique(c(which(is.na(x)), which(is.na(y)), which(is.na(z))))
  if(length(idx)>0){
    x <- x[-idx]
    y <- y[-idx]
    z <- z[-idx]
  }
  p <- makeScatterBox(x,y,z, ...)
  points(trans3d(x,y, z, p), pch=pch, col=col, cex=cex)
  
  
}

pdf("../figures/2a_3dplot.pdf")
scatterPlot3d(comp$Year, comp$Treatment.comb, comp$log.Spatial.extent,
              xlab="Year", ylab="# Treatment Combinations", zlab="Log Spatial Extent (m)",
              ticktype="detailed", theta=-390, phi=15)
dev.off()

pdf("../figures/2b_3dplot.pdf")
scatterPlot3d(comp$Year, comp$log.Species, comp$log.Spatial.extent,
              xlab="Year", ylab="Log # of Species", zlab="Log Spatial Extent (m)",
              ticktype="detailed", theta=-390, phi=15)
dev.off()

#pdf("../figures/3_3dplot2.pdf")
#scatterPlot3d(comp$log.Species, comp$log.Treatment.comb, comp$log.Spatial.extent,
#              xlab="Log # Species",  ylab="Log # Treatment Combinations", zlab="Log Spatial Extent (m)",
#              ticktype="detailed", theta=300, phi=10)
#dev.off()

pdf("../figures/4_3dplot.pdf")
scatterPlot3d(comp$log.Species[which(comp$log.Glob.spatial!="NA")], comp$log.Glob.spatial[which(comp$log.Glob.spatial!="NA")], comp$log.Spatial.grain[which(comp$log.Glob.spatial!="NA")],
              xlab="Log # Species",  ylab="Log Global Spatial Extent", zlab="Log Spatial Grain (m)",
              ticktype="detailed", theta=225, phi=15)
dev.off()

###Comparisons between different habitat types###

#Remove studies across habitats

dat=comp[which(comp$System!="CRRS"),]
dats=dat[which(dat$System!="RSRI"),]
dats$System=factor(dats$System)
#Add an interaction variable for post-hoc tests

dats$int=with(dats, interaction (System, Journal, sep="x"))

#Creating a dummy matrix for false discovery rate post-hoc tests of interaction terms

#K1 <- glht(dur, mcp(System = "Tukey"))$linfct
#K2 <- glht(dur, mcp(Journal = "Tukey"))$linfct
#summary(glht(dur, linfct = rbind(K1, K2)))
tmp <- expand.grid(System = unique(dats$System),
                   Journal = unique(dats$Journal))
X <- model.matrix(~ System * Journal, data = tmp)
#X2 <-model.matrix(~ Journal * System, data = tmp) 
#glht(dur, linfct = X)

#predict(dur, newdata=tmp)

Tukey <- contrMat(table(dats$System), "Tukey")
K1 <- cbind(Tukey, matrix(0, nrow = nrow(Tukey), ncol = ncol(Tukey)))
rownames(K1) <- paste(levels(dats$Journal)[1], rownames(K1), sep = ":")
K2 <- cbind(matrix(0, nrow = nrow(Tukey), ncol = ncol(Tukey)), Tukey)
rownames(K2) <- paste(levels(dats$Journal)[2], rownames(K2), sep = ":")
K <- rbind(K1, K2)
colnames(K) <- c(colnames(Tukey), colnames(Tukey))
#summary(glht(dur, linfct = K %*% X))


#Duration
dur=glm(Duration+1 ~ System*Journal, family=Gamma(link="log"), control=list(maxit=10000), data=dats)
summary(dur)
adur=Anova(dur, type="II", test.statistics="LR")
sumdur1=summary(glht(dur, mcp(System="Tukey"), test = adjusted("fdr")))
sumdur2=summary(glht(dur, mcp(Journal="Tukey"), test = adjusted("fdr")))
sumdur3=summary(glht(dur, linfct = K %*% X), test = adjusted("fdr"))

durcon=confint(glht(dur, mcp(System="Tukey")))
durco=durcon$`confint`
durp=sumdur1$`test`$pvalues
tukdur=cbind(durco, durp)

#dur.int=glm(Duration+1 ~ int, family=Gamma(link="log"), control=list(maxit=10000), data=dats)
#summary(glht(dur.int, mcp(int="Tukey")))

#anova(dur, test="F")
#TukeyHSD(aov(dur), ordered=T)

#Factors

fac=glm(Factors ~ System*Journal, family=quasipoisson (link="log"), data=dats)
summary(fac)
afac=Anova(fac, type="II", test.statistics="LR")
sumtreat1=summary(glht(fac, mcp(System="Tukey"), test = adjusted("fdr")))
sumtreat2=summary(glht(fac, mcp(Journal="Tukey"), test = adjusted("fdr")))
sumtreat3=summary(glht(fac, linfct = K %*% X), test = adjusted("fdr"))


#Treatment combinations
treat=glm(Treatment.comb ~ System*Journal, family=Gamma(link="log"), control=list(maxit=10000), data=dat[which(dats$Treatment.comb!="NA"),])
summary(treat)
atreat=Anova(treat, type="II", test.statistics="LR")
sumtreat1=summary(glht(treat, mcp(System="Tukey"), test = adjusted("fdr")))
sumtreat2=summary(glht(treat, mcp(Journal="Tukey"), test = adjusted("fdr")))
sumtreat3=summary(glht(treat, linfct = K %*% X), test = adjusted("fdr"))


#treat.int=glm(Treatment.comb+1 ~ int, family=Gamma(link="log"), control=list(maxit=10000), data=dats)
#summary(glht(treat.int, mcp(int="Tukey")))

#Species
spec=glm.nb(Species ~ System*Journal, link=log,control=list(maxit=1000, trace=FALSE, epsilon=1e-8), data=dats)
summary(spec)
aspec=Anova(spec, type="II", test.statistics="LR")
sumspec1=summary(glht(spec, mcp(System="Tukey"), test = adjusted("fdr")))
sumspec2=summary(glht(spec, mcp(Journal="Tukey"), test = adjusted("fdr")))
sumspec3=summary(glht(spec, linfct = K %*% X), test = adjusted("fdr"))

spccon=confint(glht(spec, linfct = K %*% X))
spcco=spccon$`confint`
spcp=sumspec3$`test`$pvalues
tukspc=cbind(spcco, spcp)

spccon2=confint(glht(spec, mcp(System="Tukey"), test = adjusted("fdr")))
spcco2=spccon2$`confint`
spcp2=sumspec1$`test`$pvalues
tukspc2=cbind(spcco2, spcp2)

#spec.int=glm.nb(Species+1 ~ int,link="log", data=dats)
#summary(glht(spec.int, mcp(int="Tukey")))

#Spatial grain
spat=glm(Spatial.grain+1 ~ System*Journal, family=Gamma(link="log"), control=list(maxit=10000), data=dats)
summary(spat)
aspat=Anova(spat, type="II", test.statistics="LR")
sumspat1=summary(glht(spat, mcp(System="Tukey"), test = adjusted("fdr")))
sumspat2=summary(glht(spat, mcp(Journal="Tukey"), test = adjusted("fdr")))
sumspat3=summary(glht(spat, linfct = K %*% X), test = adjusted("fdr"))


#spat.int=glm(Spatial.grain+1 ~ int, family=Gamma(link="log"), control=list(maxit=10000), data=dats)
#summary(glht(spat.int, mcp(int="Tukey")))

#Global spatial scale

gspat=glm(Glob.spatial+1 ~ System*Journal, data=dats, family=Gamma(link="log"), control=list(maxit=10000))
summary(gspat)
#agspat=Anova(gspat, type="II", test.statistics="LR")


#Gamma hurdle approach to global spatial
dats$non_zeroGS <- ifelse(dats$Glob.spatial > 0, 1, 0)
m1 <- glm(non_zeroGS ~ System*Journal, data = dats, family = binomial(link = logit))
m2 <- glm(Glob.spatial ~ System*Journal, data = subset(dats, non_zeroGS == 1), 
          family = Gamma(link = log), control=list(maxit=10000))

Anova(m1)
summary(update(m1, .~.-1))

agspat=Anova(m2)
summary(update(m2, .~.-1))

summary(m2)

sumzero1=summary(glht(m1, mcp(System="Tukey"), test = adjusted("fdr")))
sumzero2=summary(glht(m1, mcp(Journal="Tukey"), test = adjusted("fdr")))
sumzero3=summary(glht(m1, linfct = K %*% X), test = adjusted("fdr"))

sumgspat1=summary(glht(gspat, mcp(System="Tukey"), test = adjusted("fdr")))
sumgspat2=summary(glht(gspat, mcp(Journal="Tukey"), test = adjusted("fdr")))
sumgspat3=summary(glht(gspat, linfct = K %*% X), test = adjusted("fdr"))

gscon=confint(glht(gspat, linfct = K %*% X))
gsco=gscon$`confint`
gsp=sumgspat3$`test`$pvalues
tukgs=cbind(gsco, gsp)

#Spatial extent

sext=glm(Spatial.extent+1 ~ System*Journal, family=Gamma(link="log"), control=list(maxit=10000), data=dats)
summary(sext)
asext=Anova(sext, type="II", test.statistics="LR")
sumsext1=summary(glht(sext, mcp(System="Tukey"), test = adjusted("fdr")))
sumsext2=summary(glht(sext, mcp(Journal="Tukey"), test = adjusted("fdr")))
sumsext3=summary(glht(sext, linfct = K %*% X), test = adjusted("fdr"))

sxcon=confint(glht(sext, linfct = K %*% X))
sxco=sxcon$`confint`
sxp=sumsext3$`test`$pvalues
tuksx=cbind(sxco, sxp)

#write.csv(sumsext3, file="spatial_extent.csv")

#sext.int=glm(Spatial.extent+1 ~ int, family=Gamma(link="log"), data=dats)
#summary(glht(sext.int, mcp(int="Tukey")))

#Chi-square table results

write.csv(print(rbind(adur, aspat, asext, agspat, aspec, atreat)), file="anovas.csv")

sumdur1$`test`$pvalues
sumdur1$`pvalues`


#False discovery rate post-hoc comparisons for relevant contrasts

rbind(tukdur, tuksx, tukgs, tukspc2, tukspc )
write.csv(rbind(tukdur, tuksx, tukgs, tukspc2, tukspc), file="finaltukey.csv")


###Mean and SD values for manuscript###
spat=tapply(dats$Spatial.extent, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE)
glob=tapply(dats$Glob.spatial, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE)
grain=tapply(dats$Spatial.grain, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE)
dur=tapply(dats$Duration, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE)
sp=tapply(dats$Species, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE)
treat=tapply(dats$Treatment.comb, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE)
fac=tapply(dats$Factors, list(Journal=dats$Journal, System=dats$System), mean, na.rm=TRUE)

allmeans=print(rbind(spat, glob, grain, dur, sp, treat, fac))

spat2=tapply(dats$Spatial.extent, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE)
glob2=tapply(dats$Glob.spatial, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE)
grain2=tapply(dats$Spatial.grain, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE)
dur2=tapply(dats$Duration, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE)
sp2=tapply(dats$Species, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE)
treat2=tapply(dats$Treatment.comb, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE)
fac2=tapply(dats$Factors, list(Journal=dats$Journal, System=dats$System), sd, na.rm=TRUE)

allsds=print(rbind(spat2, glob2, grain2, dur2, sp2, treat2, fac2))

#Mean and standard error values by year block for manuscript table

ymeans <- ddply(comp, .(Year.block), summarise, 
                Glob.spatial = mean(na.omit(Glob.spatial)),
                Spatial.extent = mean(Spatial.extent),
                Spatial.grain = mean(Spatial.grain),
                Duration = mean(Duration),
                Species = mean(Species),
                Factors= mean(Factors),
                Treatment.comb = mean(na.omit(Treatment.comb)),
                N = length(Year.block)
)   

serrs <- ddply(dat, .(Year.block), summarise, 
               Glob.spatial = sd(na.omit(Glob.spatial))/sqrt(length(na.omit(Glob.spatial))),
               Spatial.extent = sd(Spatial.extent)/sqrt(length(Spatial.extent)),
               Spatial.grain = sd(Spatial.grain)/sqrt(length(Spatial.grain)),
               Duration = sd(Duration)/sqrt(length(Duration)),
               Species = sd(Species)/sqrt(length(Species)),
               Factors= sd(Factors)/sqrt(length(Factors)),
               Treatment.comb = sd(na.omit(Treatment.comb))/sqrt(length(na.omit(Treatment.comb)))
)   

write.csv(ymeans, file="meanvals.csv")
write.csv(serrs, file="serrvals.csv")

#Site replication for manuscript table

table1=ddply(comp, .(Year.block), summarise, 
             Replication = length(Year.block[which(Glob.spatial>0)]),
             N = length(Year.block),
             Percent= Replication/N      
)   

write.csv(table1, file="replication.csv")
#Plot mean and SE values for each variable against habitat type and journal

#condense dataset for ease of plotting

#Make two plots, one for each journal
massmelt=melt(dats, id.vars=c("Year.block","Journal", "System"), measure.vars=c("log.Treatment.comb", "log.Species", "log.Duration", "log.Spatial.grain", "log.Glob.spatial", "log.Spatial.extent"))

#Make a plot for all journals combined

massmelt=melt(dats, id.vars=c("Year.block", "System"), measure.vars=c("log.Treatment.comb", "log.Species", "log.Duration", "log.Spatial.grain", "log.Glob.spatial", "log.Spatial.extent"))

#Plot for true values

massmelt=melt(dats, id.vars=c("Year.block", "System", "Journal"), measure.vars=c("Duration", "Spatial.grain", "Spatial.extent", "Glob.spatial", "Species", "Factors", "Treatment.comb"))

head(massmelt)

#Remove problematic entries for cross-system comparisons and one study where treamtent combinations were not available

massmelt=massmelt[which(massmelt$value!="NA"),]


means.sem=ddply(massmelt, c("System", "variable", "Journal"), summarise,mean=mean(value), sem=sd(value)/sqrt(length(value)))
means.sem <- transform(means.sem, lower=mean-sem, upper=mean+sem)

bmeans.barplot <- qplot(x=Journal, y=mean, colour=variable, fill=System, data=means.sem, geom="bar",  stat="identity", position="dodge", xlab="", ylab="Mean +/- SE", main="") + facet_wrap(~variable, scales="free_y", ncol=2, nrow=4) + scale_fill_grey(start = 0, end = .95) +theme_bw()+ geom_errorbar(aes(ymax=upper, ymin=lower), position=position_dodge(0.9), data=means.sem)+scale_color_grey(end=0)

pdf(file="Figure 6.pdf")
bmeans.barplot
dev.off()

levels(compMelt$variable) <- gsub("Spatial.grain", "Spatial Grain", levels(compMelt$variable))
levels(compMelt$variable) <- gsub("Glob.spatial", "Global Spatial Extent", levels(compMelt$variable))
levels(compMelt$variable) <- gsub("comb", "Combinations", levels(compMelt$variable))
levels(compMelt$variable) <- gsub("\\.", " ", levels(compMelt$variable))

######## A function to generate predicted values from quantile 
# regressions at different levels of Tau
quantRegLines.values <- function(rq_obj, lincol="red", logTrans=T, se="rank", 
                                 lwd=1, lty=1,
                                 ns=T, lwd.ns = 0.5, lty.ns = 2, alpha=0.05, ...){  
  #get the taus
  taus <- rq_obj$tau
  lwd=rep(lwd, length(rq_obj$tau))
  lty=rep(lty, length(rq_obj$tau))
  
  #get the p-values
  if(se != "rank"){
    p <- sapply(summary(rq_obj, se=se), function(adf) coef(adf)[2,4])
  }else{
    p <- as.numeric(sapply(summary(rq_obj, se="rank"), function(adf) 
      0 == sign(coef(adf)[2,2])+sign(coef(adf)[2,3])
    ))
    
  }
  if(ns){
    nsVals <- which(p>0.05)
    
    lwd[nsVals] <- lwd.ns
    lty[nsVals] <- lty.ns
    
  }
  #get x
  x <- rq_obj$x[,2] #assumes no intercept
  xx <- seq(min(x, na.rm=T),max(x, na.rm=T),1)
  
  #calculate y over all taus
  f <- coef(rq_obj)  
  yy <- cbind(1,xx)%*%f
  if(logTrans) yy  <- exp(yy)-1
  if(length(lincol)==1) lincol=rep(lincol, length(taus))
  
  yy <- as.data.frame(yy)
  yy$x <- xx
  yyMelt <- melt(yy, "x", variable.name = "tau")
  yyMelt$lwd <- as.vector(sapply(lwd, function(l) rep(l, length(xx))))
  yyMelt$lty <- as.vector(sapply(lty, function(l) rep(l, length(xx))))
  yyMelt$p <- as.vector(sapply(p, function(l) rep(l, length(xx))))
  yyMelt$tau <- gsub("tau\\=", "", yyMelt$tau)
  #geom_line(data=yyMelt, mapping=aes(x=x, y=value), lwd=yyMelt$lwd, lty=yyMelt$lty)
  #plot all lines
  #  for(i in 1:length(taus)){
  #    lines(xx,yy[,i], col=lincol[i], lwd=lwd[i], lty=lty[i], ...)
  #  }
  yyMelt
}


###########
#GGPLOT2 the biplots with quantile fits

taus <- seq(0.05, 0.95, .1)

rq_lines <- dlply(compMelt, .(variable), function(subDF) {
  print(subDF$variable[1])
  subDF <- na.omit(subDF)
  
  rq_fit <- rq(log(value+1)~(Year),data=subDF, tau=taus)
  
  #  quantRegLines(rq_fit, lincol=rainbow(length(taus)))
  
  rq_fit
})

names(rq_lines) <- levels(compMelt)

rq_curves <- ldply(rq_lines, quantRegLines.values, se="ker")
rq_curves$ns <- ifelse(rq_curves$p>0.05,"No","Yes")


rq_biplots <- ggplot(data=compMelt, mapping=aes(x=Year, y=value)) +
  geom_point(alpha=0.5) +
  facet_wrap(~variable, scale="free_y") +
  geom_line(data=rq_curves, mapping=aes(x=x, y=value, color=tau, lty=ns, lwd=ns)) +
  scale_color_discrete(name = "Quantile (tau)") +
  scale_size_manual(name = "Different from 0", values=c(.5,1)) +
  scale_linetype_manual(name = "Different from 0", values=c(2,1)) +
  theme_bw(base_size=17, base_family="Helvetica")

pdf("../figures/quantreg_biplots.pdf", width=12, height=8)
rq_biplots
dev.off()

#A Function to extract confidence bands
getBand <- function(rq_obj, se="ker"){
  out <- lapply(summary(rq_obj, se=se), function(adf) coef(adf)[2,])
  names(out) <- rq_obj$tau
  out <- ldply(out)
  names(out)[1] <- "tau"
  out
}

#write out the coefficients and t-tests for these values of tau
tab <- ldply(rq_lines, getBand)
write.csv(tab, "../figures/quanreg_table.csv")


########Coefficients and CIs across a wide range of tau values
#subDF <- subset(compMelt, compMelt$variable=="Glob.spatial") #debug

#many taus
taus2 <- seq(0.05, 0.95, length.out=25)

#make some fit rq objects
rq_lines2 <- dlply(compMelt, .(variable), function(subDF) {
  print(subDF$variable[1])
  subDF <- na.omit(subDF)
  
  rq_fit <- rq(log(value+1)~(Year),data=subDF, tau=taus2)
  rq_fit
})
#########
#ggplot coefs and CIs across a wide range of tau values
#########

tab2 <- ldply(rq_lines2, function(adf) getBand(adf, se="rank"))
names(tab2) <- gsub(" ", "\\.", names(tab2))
tab2$tau <- as.numeric(tab2$tau) #need to take it back to a numeric
#get rid of anomolous bad CI
tab2$lower.bd[which(tab2$lower.bd < -1e300)] <- -Inf
coefPlot <- ggplot(data=tab2, mapping=aes(x=tau, y=coefficients, ymin=lower.bd, ymax=upper.bd)) +
  facet_wrap(~variable, scale="free_y") +
  geom_ribbon(fill="grey", color=NA) +
  geom_point() +
  theme_bw(base_size=17, base_family="Helvetica")+
  geom_abline(slop=0, intercept=0, lty=2) +
  ylab("Coefficient") + xlab("Quantile (tau)") +
  scale_x_continuous(breaks=seq(0.1,1,length.out=10))

pdf("../figures/quantreg_coefplots.pdf", width=12, height=8)
coefPlot
dev.off()