`dat4bugsCOV` <- function (datafile, tstep = 1, loess_fill=FALSE, ...) { # # Function dat4bugsCOV - Prepares and writes input data for DCRWSmeta_cov model # Arguments: # datafile - An R list of [[1]] Argos tracking data and [[2]] covariate data. # See example.R for details on structure. # tstep - The time step to be assumed for the state-space model, specified in days. # loess_fill - default FALSE. Set to TRUE if covariate data contains missing values. tstep.sec <- tstep * 86400; tstep.hr <- tstep * 24 # timesteps # loads data as a list, where 1st item is locations, 2nd item is covariate data dat <- datafile[[1]] # add a class Z repeat of first location to make the start time regular start.z <- function(k) { k <- rbind(k[1, ], k) # insert a repeat of 1st pos k$lc[1] <- "Z" date1 <- trunc(k$time[1], units = "days") + c(0:c(24/tstep.hr))*tstep.hr*60*60 k$time[1] <- date1[which(date1>k$time[1])[1]-1] k} dat2 <- by(dat, dat$id, start.z) # rearrange data back to original format restack <- function(x) { for (i in 1:length(x)) { if (i==1) {x2 <-x[[1]]} if (i>1) {x2 <- rbind(x2,x[[i]])} } x2} dat <- restack(dat2) # organize the ARGOS location class errors sigma.lon <- c(0.289866, 0.3119293, 0.9020423, 2.1625936, 0.507292, 4.2050261, 4.2050261) # Z class ~ B class sigma.lat <- c(0.1220553, 0.2605126, 0.4603374, 1.607056, 0.5105468, 3.041276, 3.041276) nu.lon <- c(3.070609, 1.220822, 2.298819, 0.9136517, 0.786954, 1.079216, 1.079216) nu.lat <- c(2.075642, 6.314726, 3.896554, 1.010729, 1.057779, 1.331283, 1.331283) nu.lon <- nu.lon[as.numeric(dat$lc)] nu.lat <- nu.lat[as.numeric(dat$lc)] sigma.lon <- (sigma.lon/6366.71 * 180)/pi sigma.lat <- (sigma.lat/6366.71 * 180)/pi itau2.lon <- sigma.lon[as.numeric(dat$lc)]^-2 itau2.lat <- sigma.lat[as.numeric(dat$lc)]^-2 dat$itau2.lon <- itau2.lon dat$itau2.lat <- itau2.lat dat$nu.lon <- nu.lon dat$nu.lat <- nu.lat #=========================================================================== dostep <- function(k) { ### FUNCTION for processing the ARGOS locations tt <- k$time tst.nona <- cut(tt, paste(tstep.sec, "sec"), labels = FALSE, incl = TRUE) # bin data by selected timestep, starting from 1st timestamp tst.seq <- seq(1, max(tst.nona)) # create full time bin sequence tst.rle <- rle(tst.nona) # calc # data values per bin tst.val <- tst.rle$values # names of actual bins containing data tst.isna <- !tst.seq %in% tst.val # ID bins lacking data k.idx <- tst.all <- sort(c(tst.nona, tst.seq[tst.isna])) # inserts a bin value for empty bins in data sequence tstall.rle <- rle(tst.all) # calc # data values per bin (all bins) idx <- cumsum(c(1, tstall.rle$lengths)) # creates pointer for where bin value changes in data sequence steplims <- seq(tt[1], by = paste(tstep.sec, "sec"), length = length(idx)) # creates regularised date sequence, starting from 1st timestamp tstall.isna <- tst.all %in% tst.seq[tst.isna] # ID bins lacking data (all bins) k.idx[tstall.isna] <- NA # insert NAs for empty bins k.idx[!tstall.isna] <- seq(nrow(k)) # now k.idx contains the original data row numbers plus some NAs k.new <- k[k.idx, ] # now k.new contains the original data plus some NAs where there are date bins empty of data step.frac <- as.numeric(difftime(k.new$time, steplims[tst.all], units = "sec"))/tstep.sec # calc the time fraction a data point is different from its date bin step.frac[is.na(step.frac)] <- 0.5 # empty date bin set midway itau2lon.isna <- is.na(k.new$itau2.lon) itau2lat.isna <- is.na(k.new$itau2.lat) k.new$itau2.lon[itau2lon.isna] <- min(k.new$itau2.lon, na.rm = TRUE) # set dates empty of data to min tau value k.new$itau2.lat[itau2lat.isna] <- min(k.new$itau2.lat, na.rm = TRUE) k.new$nu.lon[k.new$nu.lon < 2 | is.na(k.new$nu.lon)] <- 2 # set dates empty of data to nu=2 (also set this as min nu) k.new$nu.lat[k.new$nu.lat < 2 | is.na(k.new$nu.lat)] <- 2 # return data list with taus, nus, date indexes (idx), weight (j),number of regularised positions etc out <- list(id = k.new$id[1], y = cbind(k.new$lon, k.new$lat), itau2 = cbind(k.new$itau2.lon, k.new$itau2.lat), nu = cbind(k.new$nu.lon, k.new$nu.lat), idx = idx, j = step.frac, RegN = length(idx), first.date = k.new$time[1], tstep = tstep) out } # end dostep function ARGOS <- by(dat, dat$id, dostep) # does 'dostep' function per animal #=========================================================================== docovar <- function(k) { ### FUNCTION for processing the covariate list tt <- k$time # bin data by selected timestep, starting from ARGOS 1st timestamp datebins <- seq(k$fd[1],by=paste(tstep.sec, "sec"),length.out = k$RegN[1]+1) tst.nona <- cut(tt, datebins, labels = FALSE, incl = TRUE) tst.seq <- seq(1, length(datebins)-1) # create full time bin sequence tst.rle <- rle(tst.nona) # calc # data values per bin tst.val <- tst.rle$values # names of actual bins containing data tst.isna <- !tst.seq %in% tst.val # ID bins lacking data k.idx <- tst.all <- sort(c(tst.nona, tst.seq[tst.isna])) # inserts a bin value for empty bins in data sequence tstall.rle <- rle(tst.all) # calc # data values per bin (all bins) idx <- cumsum(c(1, tstall.rle$lengths)) # creates pointer for where bin value changes in data sequence steplims <- seq(k$fd[1], by = paste(tstep.sec, "sec"), length = length(idx)) # creates regularised date sequence, starting from 1st timestamp tstall.isna <- tst.all %in% tst.seq[tst.isna] # ID bins lacking data (all bins) k.idx[tstall.isna] <- NA # insert NAs for empty bins k.idx[!tstall.isna] <- seq(nrow(k)) # now k.idx contains the original data row numbers plus some NAs k.new <- k[k.idx, ] # now k.new contains the original data plus some NAs where there are date bins empty of data step.frac <- as.numeric(difftime(k.new$time, steplims[tst.all], units = "sec"))/tstep.sec # calc the time fraction a data point is different from its date bin step.frac[is.na(step.frac)] <- 0.5 # empty date bin set midway ncols <- 3:(ncol(k.new)-3) ; # may (later) have more than 1 covar in this dataset for (icol in 1:length(ncols)) { tmp <- as.vector(tapply(k.new[,ncols[icol]],tst.all, mean, na.rm=T)) if (icol==1) {covar.mn <-matrix(NA,nrow=length(tmp),ncol=length(ncols));covar.sd<-covar.mn;} covar.mn[,icol]<- tmp # take the mean covariate value covar.sd[,icol]<- as.vector(tapply(k.new[,ncols[icol]],tst.all, sd, na.rm=T)) # carry the sd } covar.mn[is.nan(covar.mn)] <- NA; covar.sd[is.nan(covar.sd)] <- NA list(covar=covar.mn, covar.sd=covar.sd, tstep = tstep) } # end docovar function # apply the docovar function per animal dat <- datafile[[2]] # covariate datafile dat$fd <- dat$time; dat$ld <- dat$time; dat$RegN <- NA # 1st timestamp has to be from the argos file # allocate the first&last ARGOS timestamp to pin datebins against for (i in 1:length(names(ARGOS))) { # each animal ii <- which(dat$id==names(ARGOS[i])) dat$fd[ii] <- ARGOS[[i]]$first.date dat$ld[ii] <- ARGOS[[i]]$first.date +(ARGOS[[i]]$RegN-1)*tstep.sec dat$RegN[ii] <- ARGOS[[i]]$RegN } dat <- dat[ dat$time >= dat$fd & dat$time < dat$ld+tstep.sec,] # only require covars where have argos covars <- by(dat, dat$id, docovar) # need the covars not in their seperate list, but to be within the ARGOS data... # here return each covar as a column in a matrix called covar for each animal/element in the ARGOS list for (i in 1:length(ARGOS)) { # each animal covar_all <- covars[[i]]$covar; covar_all_sd <- covars[[i]]$covar.sd; ARGOS[[i]]$covar <- covar_all; ARGOS[[i]]$covar.sd <- covar_all_sd # loess fit to predict missing values ARGOS[[i]]$covar.prec <- ARGOS[[i]]$covar*NA if (loess_fill) { n<-nrow(ARGOS[[i]]$covar) for (ii in 1:ncol(ARGOS[[i]]$covar)) { # each covariate tmp <- fit.loess(ARGOS[[i]]$covar[,ii]) ARGOS[[i]]$covar[,ii] <- tmp$covar ARGOS[[i]]$covar.prec[,ii] <- tmp$covar.prec } } # loess loop } # each animal #=========================================================================== ARGOS # return the data } # end dat4bugsCOV function #=============================================================================== # fit.loess function called within docovar function # apply loess smoother to fill if (few) missing values occur within covariate data at the model timestep fit.loess <- function(xdat,plot_loess=FALSE) { n<-length(xdat) ff <- which(!is.na(xdat)) fitrange <- max(1,min(ff)-1):min(max(ff)+1,n) usepred <- c(1:n)[-ff] loess_prec<- rep(10000,n) loess_pts <- NA*usepred #noise <- rnorm(n, mean = 0, sd = 2.5*(sd(xdat,na.rm=TRUE)/sqrt(length(ff)))) # add noise=2.5*SE (too small) noise <- rnorm(n, mean = 0, sd = 0.5*sd(xdat,na.rm=TRUE)) # add noise=0.5*SD # fine loess uses 7days data fit_fine <- loess(xdat[ff]~ff, span=(7/tstep)/length(ff),degree=2, control = loess.control(surface = "direct")) pred_fine <- predict(fit_fine,1:n,se=TRUE) # rough loess uses 28days data fit_rough <- loess(xdat[ff]~ff, span=(28/tstep)/length(ff),degree=2, control = loess.control(surface = "direct")) pred_rough <- predict(fit_rough,1:n,se=TRUE) loess_pts <- pred_fine$fit[usepred] + noise[usepred] loess_prec[usepred] <- 1/((pred_fine$se.fit[usepred])^2) # pts near edges of data filled with 28d smoother (instead of prev. pad below edges <- usepred <7 | usepred > (n-7) loess_pts[edges] <- pred_rough$fit[usepred[edges]] + noise[usepred[edges]] loess_prec[usepred[edges]] <- 1/((pred_rough$se.fit[usepred[edges]])^2) # insert the loess values where data missing, with noise, but keep within obs limits rn_l <- range(loess_pts); rn_xdat <- range(xdat,na.rm=TRUE); if (rn_l[2] > rn_xdat[2] ) {loess_pts[loess_pts> rn_xdat[2] ] <- quantile(xdat,0.99,na.rm=TRUE) } if (rn_l[1] < rn_xdat[1] ) {loess_pts[loess_pts< rn_xdat[1] ] <- quantile(xdat,0.01,na.rm=TRUE) } xdat[usepred] <- loess_pts if (plot_loess) { dev.new() plot((1:n)[-usepred],xdat[-usepred],xlim=c(0,n+1),ylab="data",xlab="timestep") # original covar data points((1:n),pred_fine$fit,col="gray") # fine loess points((1:n),pred_rough$fit,col="pink") # rough loess points((1:n)[usepred],xdat[usepred],col="red") # loess-filled points } list(covar=xdat, covar.prec=loess_prec) # return the filled data and the precision estimate } # end loess function #===============================================================================