#############NCAP code (adapted from Millar et al., 2005)################################################# #Source code #NCAP program for multiple environmental variables ########################################################################### gradient.choice=function(type="vonB") { gradientfunction=switch(type, vonB={function(b,x) return( pmax(0,(1-exp(-x%*%b))) )}, hyperbolic={function(b,x) return( pmax(0,x%*%b/(1+x%*%b)) )}, logistic={function(b,x) return( exp(x%*%b)/(1+exp(x%*%b)) )}, shifted.logistic={function(b,x) return( exp(b[1]+x%*%b[-1])/(1+exp(b[1]+x%*%b[-1])) )}, stop("\n","Unrecognized gradient", "\n","Must be one of: vonB, hyperbolic, logistic, shifted.logistic","\n") ) return(gradientfunction) } #Old.Model uses X=model("logTHCp1") specification #Old.model=function(charformula) { # return( as.matrix(model.matrix(as.formula(paste("~",charformula)))[,-1]) ) } model=function(formula.spec,fixed.intercept=T) { if(fixed.intercept) X=as.matrix(model.matrix(formula.spec)[,-1]) else { cat("\n CAUTION: Returning design matrix for intercept term in", "linear predictor.","\n","This is NOT VALID for the vonB gradient","\n") X=as.matrix(model.matrix(formula.spec)) } attr(X,"fixed.intercept")=fixed.intercept return(X) } NLCCor=function(b,x,Q,gradient,blow=NULL,bhigh=NULL,pwgt=0.001) { y=gradient(b,x) #y=y+0.001*(-1)^(1:length(y)) #Jiggle to avoid numerical problems penalty=0 if(!is.null(blow) & !is.null(bhigh)) penalty=pwgt*sum((pmax(0,b-bhigh)+pmin(0,b-blow))^2) fit1=lm(y~Q) return(-summary(fit1)$r.squared+penalty) } NLCCorSeq=function(b0,x,Q,grad,...) { npar=length(b0) fitseq=matrix(0,nrow=ncol(Q),ncol=npar+2) n=nrow(Q) best.m=0; best.rsq=0 for(npco in 1:ncol(Q)) { target.rsq=1-exp(log(1-best.rsq)-(npco-best.m)*max(2,log(n))/n) fitseq[npco,npar+2]=target.rsq MaxNLCor=nlm(NLCCor,b0,x=x,Q=Q[,1:npco],gradient=grad,iterlim=500,...) b=MaxNLCor$est fitseq[npco,1:npar]=b fitseq[npco,npar+1]=-MaxNLCor$min if(fitseq[npco,npar+1]>target.rsq) {best.m=npco; best.rsq=fitseq[npco,npar+1]} } return(fitseq) } #Example of use #NLCCorSeq(0.01,x=X,Q,gradient,blow=0.0,bhigh=1) plot.NCAP=function(b,x,Q,gradient,resids=T,rescale=T,xpts=NULL,...) { y=gradient(b,x) fit=lm(y~Q) yfit=fit$fit; resid=fit$resid if(rescale) { gfit=lm(yfit~y) resid=gfit$resid/gfit$coef[2] yfit=y+resid #Reverse role of y and yfit } if (interactive() & ncol(x)==1) { if(is.null(xpts)) xpts=as.matrix(seq(min(x),max(x),length=100)) plot(xpts,gradient(b,as.matrix(xpts)),type="l",ylab="Gradient",las=1,...) points(x,yfit) } if (ncol(x)==2 & !attr(x,"fixed.intercept")) { x=as.matrix(x[,2]) if(!is.null(xpts)) xpts=cbind(1,xpts) else xpts=cbind(1,as.matrix(seq(min(x),max(x),length=100))) plot(xpts[,2],gradient(b,xpts),type="l",ylab="Gradient",las=1,...) points(x,yfit,ylab="Gradient",...) } if(interactive() & resids) { cat("\nType for residual plot") readline() plot(y,resid,xlab="Gradient",ylab="Residual",las=1) abline(h=0) } } BootNLCor=function(b0,x,D,npco,gradient,nsamps=100,coverage=0.95,...) { npar=length(b0) bootsumm=matrix(0,nrow=nsamps,ncol=npar+3) for(iter in 1:nsamps) { resampled=sample(1:nrow(x),replace=T) if(round(iter/1)==iter/1) dput(list(iter=iter,resampled=resampled),file="BootProgress.txt") bootx=as.matrix(x[resampled,]); bootD=D[resampled,resampled] bootQ=pco(bootD,varplot=F)$vec[,1:npco] bootfit=nlm(NLCCor,b0,x=bootx,Q=bootQ,gradient=gradient,iterlim=500,...) LinCor=cancor(bootx,bootQ)$cor^2 bootsumm[iter,]=c(bootfit$est,-bootfit$min,bootfit$code,LinCor) } codecount=sum(bootsumm[,ncol(bootsumm)]>2) if(codecount>0) cat("\nWARNING:",codecount,"fits may not have converged \n") CIsamps=round(nsamps*(c((1-coverage)/2,1-(1-coverage)/2))) if(CIsamps[1]>0){ cat("\n",100*coverage,"% bootstrap percentile CI",ifelse(npar<2,"is","are"),"\n") write(signif(apply(as.matrix(bootsumm[,1:npar]),2,sort)[CIsamps,],3),"",ncol=2) } attr(bootsumm,"m")=npco return(bootsumm) } PermNLCor=function(b0,x,D,npco,gradient,nsamps=100,...) { npar=length(b0) Q=pco(D,varplot=F)$vec[,1:npco] observedfit=nlm(NLCCor,b0,x=x,Q=Q,gradient=gradient,iterlim=500,...) observedF=(-observedfit$min-cancor(x,Q)$cor^2)/(1+observedfit$min) cat("\nObserved NCAP correlation and F-stat are", -observedfit$min,"and",observedF,"\n") permsumm=matrix(0,nrow=nsamps,ncol=npar+3) for(iter in 1:nsamps) { resampled=sample(1:nrow(x),replace=F) if(round(iter/1)==iter/1) dput(list(iter=iter,resampled=resampled),file="PermProgress.txt") permx=as.matrix(x[resampled,]) permfit=nlm(NLCCor,b0,x=permx,Q=Q,gradient=gradient,iterlim=500,...) LinCor=cancor(permx,Q)$cor^2 permsumm[iter,]=c(permfit$est,-permfit$min,permfit$code,LinCor) } cor.pval=1-(rank(c(-observedfit$min,permsumm[,npar+1]))[1]-1)/(nsamps+1) F.rank=rank(c(observedF,(permsumm[,npar+1]-permsumm[,npar+3])/(1-permsumm[,npar+1]))) F.pval=1-(F.rank[1]-1)/(nsamps+1) cat("\nNull model permutational p-value is",cor.pval, "\nLinear model hypothesis p-value is",F.pval,"\n") attr(permsumm,"m")=npco return(permsumm) } ##########################Executable NCAP code##################################################### 2000s.df= read.table('2000s.txt', sep="") Dist.df =read.table ('2000sfactors.txt',header=T,sep=""); attach (Dist.df) D=distance(t(as.matrix(2000s.df)),measure="BC", trans="squareroot") pcoD=pco(D) gradient=gradient.choice("logistic") X=model(~Distances) npar=ncol(X) b0=rep(0.2,npar) par(mfrow=c(2,3),mar=c(4,5,2,3)) fitseq=NLCCorSeq(b0,X,pcoD,gradient,m=20,stat="Rsq",plots=T,blow=0.0,bhigh=1,pwgt=0) fitseq2=NLCCorSeq(b0,X,pcoD,gradient,m=20,stat="RDA",plots=T,blow=0.0,bhigh=1,pwgt=0) m=2 criterion="Rsq" CCr2=LinCCor(X,pcoD,m,stat=criterion) NLCC=nlm(NLCCor,b0,X=X,pcoD=pcoD,gradient=gradient,m=m,stat=criterion, print.level=1,iterlim=500,typsize=b0,blow=rep(0,npar),bhigh=rep(1,npar),pwgt=1) cat("\nMaximized",criterion,"statistic is",-NLCC$min,"\n") bhat=NLCC$est cat("\nPseudo F-stat for nonlinear vs linear gradient is", (-NLCC$min-CCr2)/(1+NLCC$min),"\n") par(mfrow=c(2,1),mar=c(4,5,2,3)) plot.NCAP(NLCC$est,X,pcoD,gradient=gradient,m=m,stat=criterion, xpts=-20:100,ylim=c(-0.5,1.5)) nsamps=1000 #1000 samples takes about 4 minutes on a 2GHz desktop boot=BootNLCor(b0,X=X,D=D,gradient,m=m,stat=criterion,nsamps=nsamps,coverage=0.9, typsize=bhat,blow=rep(0.01*bhat,npar),bhigh=rep(20*bhat,npar),pwgt=1000) nsamps=999 #999 perms takes about 12 minutes on a 2GHz desktop perm=PermNLCor(b0,X=X,D=D,gradient,m=m,stat=criterion,nsamps=nsamps, typsize=bhat,blow=rep(0.01*bhat,npar),bhigh=rep(10*bhat,npar),pwgt=1000) par(cex=1.2) plot.NCAP(NLCC$est,X,pcoD,gradient=gradient,m=m,stat=criterion, xlab="Distance from edge (m)", xlim=range(Distances),xaxt="n", ylim=c(-0.5,1.5), resids=F) axis(1,at=Distances,lab=Distances) ##############################Gradient forests code (adpated from the Pitcher et al., 2011)Packages 'Etended forest' and 'Gradient forests' required"############################# #first do spearmen coorelations (package Hmisc) core1d=rcorr(e1d, type='spearman') core1d= cor(e1d, use="pairwise.complete.ob", method="spearman") pairs(core1d) corrgram(core1d) nSites <- dim(Finale3)[1] nSpecs <- dim(b3)[2] lev <- floor(log2(nSites * 0.368/2)) lev #or lev=1 # find species with 5 distinct abundances distinct.GT5 <- apply(b3,2,function(x){length(unique(x))}) > 5 sum(distinct.GT5) # how many species # select species with adequate frequency b3a <- b3[,distinct.GT5] #Run gradient forests gf <- gradientForest(cbind(Finale2, b2),predictor.vars = colnames(Finale2), response.vars = colnames(b2),ntree = 1000, transform = NULL, compact = T,nbin = 201, maxLevel = 1, corr.threshold = 0.7) gf names(gf) plot(gf, plot.type = "O") most_important <- names(importance(gf))#this has to be the number of predictor variables par(mgp = c(2, 0.75, 0)) plot(gf, plot.type = "S", imp.vars = most_important,leg.posn = "topright", cex.legend = 0.4, cex.axis = 0.6,cex.lab = 0.7, line.ylab = 0.9, par.args = list(mgp = c(1.5,0.5, 0), mar = c(1.1, 1.5, 0.1, 1))) plot(gf, plot.type ="C", imp.vars=most_important, show.overall=F, legend=T, leg.posn="topleft", leg.nspecies=5, cex.lab=0.7, cex.legend=0.4, cex.axis=0.6, line.ylab=0.9, par.args=list(mgp=c(1.5, 0.5, 0), mar=c(2.5,1.0,0.1,0.5), omi=c(0,0.3,0,0))) plot(gf, plot.type = "C", imp.vars = most_important,show.species = F, common.scale = T, cex.axis = 0.6,cex.lab = 0.7, line.ylab = 0.9, par.args = list(mgp = c(1.5,0.5, 0), mar = c(2.5, 1, 0.1, 0.5), omi = c(0,0.3, 0, 0))) plot(gf, plot.type = "P", show.names = F, horizontal = F, cex.axis = 1, cex.labels = 0.7, line = 2.5) plot(gf, plot.type = "P", show.names = T, horizontal = F,cex.axis = 1, cex.labels = 0.7, line = 2.5) plot(gf, plot.type ="C", imp.vars=most_important, show.overall=F, legend=T, leg.posn="topleft", leg.nspecies=10, cex.lab=0.7, cex.legend=0.4, cex.axis=0.6, line.ylab=0.9, par.args=list(mgp=c(1.5, 0.5, 0), mar=c(2.5,1.0,0.1,0.5), omi=c(0,0.3,0,0)))