#Code for eBird migration project
# (c) 2013 -2014 Sarah Supp 

library(ggmap)
library(maptools)
library(fields)
library(sp)
library(raster)
library(maps)
library(mapdata)
library(rgdal)
library(raster)
library(gamm4)

#set working directory
main = "C:/Users/sarah/Dropbox/ActiveResearchProjects/Hummingbird_eBirdMigration"
figpath = "C:/Users/sarah/Dropbox/ActiveResearchProjects/Hummingbird_eBirdMigration/Figures"
gitpath = "C:/Users/sarah/Documents/GitHub/hb-migration"
wd = "C:/Users/sarah/Dropbox/ActiveResearchProjects/Hummingbird_eBirdMigration/data"
setwd(wd)


#---------------------------------------------------------------------------------------
#              predict migration paths, dates, speed, and error across years
#---------------------------------------------------------------------------------------

# read in summary of effort data (Number of eBird checklists submitted per day per year)
effort = read.table("FAL_hummingbird_data/checklist_12_2004-2013wh_grp.txt", header=TRUE, as.is=TRUE)

# read in country outline shp files
USAborder = readShapePoly("borders/USA_adm/USA_adm0.shp")
Mexborder = readShapePoly("borders/MEX_adm/MEX_adm0.shp")
Canborder = readShapePoly("borders/CAN_adm/CAN_adm0.shp")

# read in the altitude layers, make rasters
elev = raster("alt_5m_bil/alt.bil")

# plot elev + map for extent
myext <- c(-175, -50, 15, 75)
plot.new()
#plot(elev, ext = myext, xlab="Longitude", ylab = "Latitude", xlim = c(-175,-50), ylim = c(15,75), col=gray(0:256/256))

borders <- function(){
  plot(USAborder, ext=myext, border="black", add=TRUE)
  plot(Mexborder, ext=myext, border="black", add=TRUE)
  plot(Canborder, ext=myext, border="black", add=TRUE)
}

plot(elev, ext=myext, addfun=borders, ylab="Latitude", xlab="Longitude", xlim = c(-175,-50), ylim = c(15,75), col=gray(0:256/256)) 

# read in the north america equal area hex grid map (FAL) and format for use
# other options include a quad map (terr_4h6/nw_vector_grid.shp) or a hexmap with land only (terr_4h6/terr_4h6.shp", sep="")
hexgrid = readShapePoly(paste(main, "/data/icosahedron_land_and_sea/icosahedron.shp", sep="")) #hex with land and sea, cropped to North America

# plot the hexgrid on a map of north america
plot(NA, NA, xlim=c(-175, -50), ylim=c(15, 75), xlab = "Longitude", ylab = "Latitude")
map('worldHires', c("usa", "canada", "mexico"), add=TRUE, fill=T, col="lightblue")
plot(hexgrid, add=T)

# make a North America base map
noam = get_map(location = "North America", zoom=3, maptype = "terrain", color = "bw")

# map total birder effort 2004:2013
eft = PlotChecklistMap(effort, hexgrid, wd)
rm(eft)

# read in eBird data
files = list.files(pattern = "*.txt")

#for each eBird file, print the number sightings per year and per month.
#plot the locations of sightings on a map, color coded by month
for (f in 1:length(files)){
  
  require(ggmap)
  require(ggplot2)
  require(plyr)
  require(reshape2)
  require(Rmisc)
  require(sp)
  require(raster)
  require(segmented)
  source(paste(gitpath, "/migration-fxns.r", sep=""))
  
  humdat = read.table(files[f], header=TRUE, sep=",", quote="", fill=TRUE, as.is=TRUE, comment.char="")
  
  names(humdat) = c("SCI_NAME", "PRIMARY_COM_NAME","YEAR", "DAY", "TIME", "GROUP_ID", "PROTOCOL_ID",
                    "PROJ_ID", "DURATION_HRS", "EFFORT_DISTANCE_KM", "EFFORT_AREA_HA", "NUM_OBSERVERS",
                    "LATITUDE", "LONGITUDE", "SUB_ID", "POLYFID", "MONTH")
  
  humdat$MONTH = factor(humdat$MONTH, levels=c(1:12), ordered=TRUE)
  
  #grab species name for setting directory paths and naming figures
  species = humdat$SCI_NAME[1] 
  species = gsub(" ","", species, fixed=TRUE)
  species = gsub("\"", "", species, fixed=TRUE)
  
  # set years of data to use - data after 2007 is more reliable
  years = c(2004:2013)

  #start a new directory
  dirpath = paste(figpath, "/", species, sep="")
     #dir.create(dirpath, showWarnings = TRUE, recursive = FALSE) #only need if directory did not previously exist
  
  #show how many records there are for the species across the years, write to txt file
  #yeartable = PlotRecords(humdat$YEAR, species)
  #ggsave(file=paste(dirpath, "/", "years", species, ".pdf", sep=""))
  
  write.table(yeartable, file = paste(dirpath, "/", species,".txt",sep=""), row.names=FALSE)
  
  #save a figure of the geographic number of checklists for the species, over all the years
  #count = PlotChecklistMap(humdat, hexgrid, dirpath)
  
  for (y in 1:length(years)){
    yrdat = humdat[which(humdat$YEAR == years[y]),]
    yreffort = effort[which(effort$YEAR == years[y]),]
    
    #plot frequency of sightings per month
    #monthtable = PlotRecords(yrdat$month, species)
    
    #get daily weighted mean location 
    meanlocs = AlternateMeanLocs(yrdat, species, hexgrid, yreffort)
    
    #use GAM model to predict daily location along a smoothing line
    preds = EstimateDailyLocs(meanlocs)
    
    #use gam approach to estimate rough starting points for segmentation from the mean loc latitude data
    startpoints = round(Est3MigrationDates(meanlocs))
    migration = startpoints
    
    #save a plot of the species migration route mapped onto continent with real observations
    pdf(file = paste(dirpath, "/trimmed-route_", species, years[y], ".pdf", sep=""), width = 7, height = 4.5)
    BasePlotMigration(preds, yrdat, migration, elev, USAborder, Mexborder, Canborder, myext)
    dev.off() 
    
    #save a plot of the species migration mapped onto an elevation raster
    pdf(file = paste(dirpath, "/elev-route_", species, years[y], ".pdf", sep=""), width = 7, height = 4.5)
    ElevPlotMigration(preds, yrdat, migration, elev, USAborder, Mexborder, Canborder, myext)
    dev.off() 
    
    #get Great Circle distances traveled each day between predicted daily locations
    dist = DailyTravel(preds, 4, 5, species, years[y], migration)
    
    #estimate migration speed for spring and fall
    speed = MigrationSpeed(dist, migration)
    
    #plot smoothed migration trajectory for the species and year
    #mig_path = PlotMigrationPath(preds, noam, species, years[y])
    #ggsave(mig_path, file=paste(dirpath, "/", "migration", species, years[y], ".pdf", sep=""))
    
#     #plot occurrences with lines showing beginning and end of migration
     PlotOccurrences(meanlocs, species, years[y], migration)
     ggsave(file=paste(dirpath, "/", "occurrences", species, years[y], ".tiff", sep=""), dpi=600)

    #Subset western species by flyway data (check bias in SE US data points) Sensu La Sorte et al in press - 
        #"The role of atmospheric conditions in the seasonal dynamics of North American migration flyways" - JOurnal of Biogeography
    if (species %in% c("Archilochusalexandri", "Selasphorusplatycercus", "Selasphorusrufus", "Selasphoruscalliope")) {
          
      #only use data west of the 103rd meridian (western flyway)
      west_yrdat = yrdat[which(yrdat$LONGITUDE <= -103),]
      
      #get daily weighted mean location 
      west_meanlocs = AlternateMeanLocs(west_yrdat, species, hexgrid, yreffort)
      
      #use GAM model to predict daily location along a smoothing line
      west_preds = EstimateDailyLocs(west_meanlocs)
      
      #use gam approach to estimate rough starting points for segmentation from the mean loc latitude data
      west_migration = round(Est3MigrationDates(west_meanlocs))
      
      #get Great Circle distances traveled each day between predicted daily locations
      west_dist = DailyTravel(west_preds, 4, 5, species, years[y], west_migration)
      
      #estimate migration speed for spring and fall
      west_speed = MigrationSpeed(west_dist, west_migration)
      
      #save a plot of the species migration route mapped onto continent with real observations
      pdf(file = paste(dirpath, "/WEST_trimmed-route_", species, years[y], ".pdf", sep=""), width = 7, height = 4.5)
      BasePlotMigration(west_preds, west_yrdat, west_migration, elev, USAborder, Mexborder, Canborder, myext)
      dev.off() 
      
      #save a plot of the species migration mapped onto an elevation raster
      pdf(file = paste(dirpath, "/WEST_elev-route_", species, years[y], ".pdf", sep=""), width = 7, height = 4.5)
      ElevPlotMigration(west_preds, west_yrdat, west_migration, elev, USAborder, Mexborder, Canborder, myext)
      dev.off() 
    }

    else {
      #create dummy variables for the eastern species. In this case, west == all variables
      west_preds = preds
      west_migration = migration
      west_speed = speed
    }

    #add year to preds, so we can save it to compare across years
    preds$year = years[y]
    west_preds$year = years[y]
    
    if (y == 1){
      pred_data = preds
      migdates = data.frame("spr_begin" = migration[[1]], "spr_end" = migration[[2]], 
                            "fal_begin" = migration[[2]], "fal_end" = migration[[3]],
                            "species" = species, "year" = years[y])
      migspeed = data.frame("spr" = speed[1], "fal" = speed[2], "species" = species, "year" = years[y])
      
      west_pred_data = west_preds
      west_migdates = data.frame("spr_begin" = west_migration[1], "spr_end" = west_migration[2], 
                                 "fal_begin" = west_migration[2], "fal_end" = west_migration[3],
                                 "species" = species, "year" = years[y])
      west_migspeed = data.frame("spr" = west_speed[1], "fal" = west_speed[2], "species" = species, "year" = years[y])
    }
    else{
      pred_data = rbind(pred_data, preds)
      dates = c(migration[[1]], migration[[2]], migration[[2]], migration[[3]], "species" = species, year = years[y])
      speed = c(speed, species, years[y])
      migdates = rbind(migdates, dates)
      migspeed = rbind(migspeed, speed)
      
      west_pred_data = rbind(west_pred_data, west_preds)
      west_dates = c(west_migration[[1]], west_migration[[2]], west_migration[[2]], west_migration[[3]], "species" = species, year = years[y])
      west_speed = c(west_speed, species, years[y])
      west_migdates = rbind(west_migdates, west_dates)
      west_migspeed = rbind(west_migspeed, west_speed)
    }
    
    if (y == length(years)){
      #write migration timing and speed data to file
      write.table(migdates, file = paste(getwd(), "/output_data/migration", species, ".txt", sep=""), 
                  append=FALSE, row.names=FALSE)
      write.table(migspeed, file = paste(getwd(), "/output_data/speed", species, ".txt", sep=""), 
                  append=FALSE, row.names=FALSE)
      write.table(pred_data, file = paste(getwd(), "/output_data/centroids", species, ".txt", sep=""), 
                  append=FALSE,row.names=FALSE)
      
      #write western flyway migration timing and speed data to file
      write.table(west_migdates, file = paste(getwd(), "/output_data/west_migration", species, ".txt", sep=""), 
                  append=FALSE, row.names=FALSE)
      write.table(west_migspeed, file = paste(getwd(), "/output_data/west_speed", species, ".txt", sep=""), 
                  append=FALSE, row.names=FALSE)
      write.table(west_pred_data, file = paste(getwd(), "/output_data/west_centroids", species, ".txt", sep=""), 
                  append=FALSE,row.names=FALSE)
    }
    
    rm(list=ls()[ls() %in% c("sitemap", "meanmap", "yrdat", "meanlocs", "migration", "preds", "dist", "mig_path", "speed",
                             "west_migration", "west_preds", "west_dist", "west_speed")])   # clears the memory of the map and year-level data
  }
  rm(list=ls()[!ls() %in% c("f", "files", "noam", "hexgrid", "effort", "pred_data", "migdates", 
                            "wd", "main", "gitpath","figpath", "USAborder", "Mexborder", "Canborder", "elev", "myext")])   # clears the memory of everything except the file list, iterator, and base map
}


#---------------------------------------------------------------------------------------
#                   compare migration rates and dates across species
#---------------------------------------------------------------------------------------
require(ggmap)
require(ggplot2)
require(plyr)
require(reshape2)
require(Rmisc)
require(sp)
require(raster)
require(gamm4)
require(chron)
require(fields)
require(RColorBrewer)
source(paste(gitpath, "/migration-fxns.r", sep=""))

setwd(wd)

# read in eBird data for migration speed, dates, and predicted path (should be in same order for species)
rfiles = list.files(path = paste(getwd(), "/output_data/", sep=""), pattern = "west_speed", full.names=TRUE)
mfiles = list.files(path = paste(getwd(), "/output_data/", sep=""), pattern = c("west_migration.*txt"), full.names=TRUE)
cfiles = list.files(path = paste(getwd(), "/output_data/", sep=""), pattern = "west_centroids.*.txt", full.names=TRUE)

# species ordered by data files, body sizes from Dunning 2008, migration distance from Nature Serve centroids
species = c("Black-chinned", "Ruby-throated", "Calliope", "Broad-tailed", "Rufous")
mass = c(3.4, 3.1, 2.65, 3.55, 3.5)
distance = c(1721.49, 2765.86, 3252.82, 1737.89, 4102.58)
spdata = data.frame(species, mass, distance, "lat_r2"= NA, "lon_r2" = NA, "spr_speed" = NA,
                    "fal_speed" = NA, "spr_date" = NA, "fal_date" = NA)


#------------------------------------------ ANALYZE THE DATA -------------------------------------

#-------------------- 
#       model to test variance in lat and lon across years, with year as a random effect
#-------------------- get gamm4 results using all years (2004-2013) and later years (2008-2013)
for (f in 1:length(cfiles)){
  preds = read.table(cfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  preds_sub = preds[which(preds$year > 2007),]
  print(preds[1,1]) #species name
  years = c(2004:2013)
  
  #grab only the predicted daily centroids from between the migration dates
  for (y in 1:length(years)){
    between = preds[which(preds$year == years[y] & preds$jday >= dates[y,1] & preds$jday <= dates[y,4]),]
    spring = preds[which(preds$year == years[y] & preds$jday >= dates[y,1] & preds$jday < dates[y,2]),]
    fall = preds[which(preds$year == years[y] & preds$jday > dates[y,2] & preds$jday <= dates[y,4]),]
    
    if (y == 1){
      migpreds = between
      pred_spr = spring
      pred_fal = fall
    }
    else{
      migpreds = rbind(migpreds, between)
      pred_spr = rbind(pred_spr, spring)
      pred_fal = rbind(pred_fal, fall)
    }
  }
  
  # subset the seasonal data by the recent years (2008-2013)
  pred_spr_sub = pred_spr[which(pred_spr$year > 2007),]
  pred_fal_sub = pred_fal[which(pred_fal$year > 2007),]
  
  #gam analysis to determine variance across years, using all of the daily centroids (Jday 1-365) in each year
  lon_gam = gamm4(lon ~ s(jday, k=10), random = ~(1|year), data = preds_sub)
  print (paste("R2 for Longitude is:", round(summary(lon_gam$gam)$r.sq,4)))
  lat_gam = gamm4(lat ~ s(jday, k=10), random = ~(1|year), data=preds_sub)
  print (paste("R2 for Latitude is:", round(summary(lat_gam$gam)$r.sq,4)))
  
  #gam analysis to determine variance across years, using only spring centroids (start of spring to peak latitude date)
  lon_gam_spr = gamm4(lon ~ s(jday, k=10), random = ~(1|year), data=pred_spr_sub)
  print (paste("R2 for spring Longitude is:", round(summary(lon_gam_spr$gam)$r.sq,4)))
  lat_gam_spr = gamm4(lat ~ s(jday, k=10), random = ~(1|year), data=pred_spr_sub)
  print (paste("R2 for spring Latitude is:", round(summary(lat_gam_spr$gam)$r.sq,4)))
  
  #gam analsysis to determine variance across years, using only fall centroids (peak latitude to end of fall date)
  lon_gam_fal = gamm4(lon ~ s(jday, k=10), random = ~(1|year), data=pred_fal_sub)
  print (paste("R2 for fall Longitude is:", round(summary(lon_gam_fal$gam)$r.sq,4)))
  lat_gam_fal = gamm4(lat ~ s(jday, k=10), random = ~(1|year), data=pred_fal_sub)
  print (paste("R2 for fall Latitude is:", round(summary(lat_gam_fal$gam)$r.sq,4)))
  
  spdata[f,4] = summary(lat_gam$gam)$r.sq
  spdata[f,5] = summary(lon_gam$gam)$r.sq
}


#--------------------
#       get linear model results using years (2008-2013) and plot as barplot
#-------------------- 
lm_mig = data.frame("species"="Archilochusalexandri", "season" = "spring", "year" = 1, "lat_slope" = 1, "lat_r2" = 1, "lon_slope" = 1, "lon_r2" = 1)

for (f in 1:length(cfiles)){
  preds = read.table(cfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  species = preds[1,1] #species name
  years = c(2008:2013)
  
  #grab only the predicted daily centroids from between the migration dates
  for (y in 1:length(years)){
    between = preds[which(preds$year == years[y] & preds$jday >= dates[y,1] & preds$jday <= dates[y,4]),]
    spring = preds[which(preds$year == years[y] & preds$jday >= dates[y,1] & preds$jday < dates[y,2]),]
    fall = preds[which(preds$year == years[y] & preds$jday > dates[y,2] & preds$jday <= dates[y,4]),]
    
    #get slope & r2 of spring and fall latitudinal migration
    spr_lm = LinearMigration(spring, years[y])
    spr_lm = data.frame("species" = species, "season"= "spring", spr_lm)
    fal_lm = LinearMigration(fall, years[y])
    fal_lm = data.frame("species" = species, "season"= "fall", fal_lm)
    lm_mig = rbind(lm_mig, spr_lm)
    lm_mig = rbind(lm_mig, fal_lm)
  }
}
lm_mig = lm_mig[-1,] #delete first row of dummy data

#plot the variance in estimated migration begin and end for all and for recent years
pdf(file = paste(figpath, "/linearslope_all_species.pdf", sep=""), width = 6, height = 5)

bxp_rate = ggplot(lm_mig, aes(season, abs(lat_slope), fill=season)) + geom_boxplot() + theme_classic() + 
  scale_fill_manual(values=c("cadetblue", "orange"), guide = "none") + 
  ylab("linear slope of seasonal migration") + theme(text = element_text(size=12)) + 
  scale_y_continuous(breaks = seq(0, 0.40, by = 0.10), limits = c(0,0.40)) + theme(text = element_text(size=12)) +
  facet_wrap(~species)

multiplot(bxp_rate, cols = 1)
dev.off()


#--------------------------- 
#       generate figures and table data for migration speed
#--------------------------- boxplots of migration speed for spring vs. fall for each species
rate = data.frame("spr" =1, "fal" = 1, "species" = "none", "year" = 1)
for (f in 1:length(rfiles)){
  sp_rate = read.table(rfiles[f], header=TRUE, sep=" ", fill=TRUE, comment.char="")
#  sp_rate = subset(sp_rate, year>2007) #use only years 2008-2013
  rate = rbind(rate, sp_rate)
  
  #print the mean and standard deviation of speed for each species (note: this includes all years, unless uncomment above line)
  print(sp_rate$species[1])
  print(paste("spring sd:", sd(sp_rate$spr)))
  print(paste("spring mean:", mean(sp_rate$spr)))
  print(paste("fall sd:", sd(sp_rate$fal)))
  print(paste("fall mean:", mean(sp_rate$fal)))
  print("")
  
  spdata[f,6] = mean(sp_rate$spr)
  spdata[f,7] = mean(sp_rate$fal)
}

rate = subset(rate, year > 2007) #subset to better-sampled years
r = melt(rate[,c(1,2,3,4)], id.vars = c("year", "species"))
names(r) = c("year", "species", "season", "rate")

#plot the variance in estimated migration speed for all and for recent years
pdf(file = paste(figpath, "/speed_all_species.pdf", sep=""), width = 15, height = 3)
  
bxp_speed = ggplot(r, aes(season, rate, fill=season)) + geom_boxplot() + theme_bw() + 
    scale_fill_manual(values=c("cadetblue", "orange"), guide="none") + ylab("km/day") + 
    scale_y_continuous(breaks = seq(0, 80, by = 20), limits = c(0,80)) + 
    facet_wrap(~species, nrow=1)
  
multiplot(bxp_speed, cols = 5)
dev.off() 


#--------------------------
#         generate figures and table data for migration dates
# ------------------------ Boxplots of the number of days +/- mean migration date, by species
dates = data.frame("spr_begin" = 1, "mid" = 1, "fal_end" = 1, "species" = "none", "year" = 1)

for (f in 1:length(mfiles)){
  sp_dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
# sp_dates = subset(sp_dates, year>2007)
  
  #Print species table data (note: this includes all years, unless uncomment above line)
  print(sp_dates$species[1])
  print(paste("spring begin median:", median(sp_dates$spr_begin)))
  print(paste("spring begin sd:", sd(sp_dates$spr_begin)))
  print(paste("lat peak median:", median(sp_dates$spr_end)))
  print(paste("lat peak sd:", sd(sp_dates$spr_end)))
  print(paste("fall end median:", median(sp_dates$fal_end)))
  print(paste("fall end sd:", sd(sp_dates$fal_end)))
  print("")
  
  #standardize dates, to get number of days +/- mean
  sp_dates = subset(sp_dates, year > 2007)
  
  spdata[f,8] = round(sd(sp_dates$spr_begin),4)
  spdata[f,9] = round(sd(sp_dates$fal_end), 4)
  
  sp_dates$spr_begin = sp_dates$spr_begin - mean(sp_dates$spr_begin)
  sp_dates$mid = sp_dates$spr_end - mean(sp_dates$spr_end)
  sp_dates$fal_end = sp_dates$fal_end - mean(sp_dates$fal_end)
  dates = rbind(dates, sp_dates[,c(1,7,4,5,6)])

}

dates = dates[-1,]
d = melt(dates, id.vars = c("species", "year"))
names(d) = c("species", "year", "season", "date")
  
#plot the variance in estimated migration begin and end for all and for recent years
pdf(file = paste(figpath, "/migdates_all_species.pdf", sep=""), width = 15, height = 3)
  
bxp_date = ggplot(d, aes(season, date, fill=season)) + geom_boxplot() + theme_bw() + 
  scale_fill_manual(values=c("cadetblue", "olivedrab3", "orange"), guide = "none") + 
  ylab("number of days -/+ mean date") + theme(text = element_text(size=12)) + 
  scale_y_continuous(breaks = seq(-40, 40, by = 20), limits = c(-40,40)) +
  facet_wrap(~species, nrow=1)
  
  multiplot(bxp_date)
  dev.off()


#------------------------------------------ PLOT THE DATA -------------------------------------


#------------------------------------
#       plot the data for species comparisons
#------------------------------------

ggplot(spdata, aes(distance, lat_r2)) + geom_point(size = mass) + xlab("total migration distance") +
  ylab("Latitude R2 by date") + stat_smooth(method = "lm") + theme_classic()

ggplot(spdata, aes(distance, lon_r2)) + geom_point(size = mass) + xlab("total migration distance") +
  ylab("Longitude R2 by date") + stat_smooth(method = "lm") + theme_classic()

ggplot(spdata, aes(distance, spr_speed)) + geom_point(size = mass) + xlab("total migration distance") +
  ylab("Population spring migration speed (km/day)") + 
  stat_smooth(method = "lm", col = "cadetblue", fill = "cadetblue", alpha = 0.2) + 
  theme_classic() #+ geom_text(label=species)

ggplot(spdata, aes(distance, fal_speed)) + geom_point(size = mass) + xlab("total migration distance") +
  ylab("Population fall migration speed (km/day)") + stat_smooth(method = "lm", col = "orange", fill = "orange", alpha = 0.2) +
  theme_classic() #+ geom_text(label=species)

ggplot(spdata, aes(distance, spr_date)) + geom_point(size = mass) + xlab("total migration distance") +
  ylab("sd in spring onset") + stat_smooth(method = "lm", col = "cadetblue", fill = "cadetblue", alpha = 0.2) +
  theme_classic()

ggplot(spdata, aes(distance, fal_date)) + geom_point(size = mass) + xlab("total migration distance") +
  ylab("sd in fall arrival") + stat_smooth(method = "lm", col = "orange", fill = "orange", alpha = 0.2) +
  theme_classic()


#---------------------------------
#       plot standard deviation in the daily centroid estimates across years 2008-2013
#---------------------------------
for (f in 1:length(cfiles)){
  preds = read.table(cfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  species = preds[1,1] #species name
  years = c(2008:2013)
  
  #grab only the predicted daily centroids from between the migration dates
  for (y in 1:length(years)){
    between = preds[which(preds$year == years[y] & preds$jday >= dates[y,1] & preds$jday <= dates[y,4]),]

    if (y == 1){
      migpreds = between
    }
    else{
      migpreds = rbind(migpreds, between)
    }
  }
  
  #compare location across the years using mean and sd for 2008:2013
  jdays = sort(unique(migpreds$jday))
  patherr = data.frame("jday"=1,"meanlat"=1, "sdlat"=1, "meanlon"=1, "sdlon"=1)
  outcount = 1
  for (j in min(jdays):max(jdays)){
    tmp = migpreds[which(migpreds$jday == j),]
    meanlat = mean(tmp$lat)
    sdlat = sd(tmp$lat)
    meanlon = mean(tmp$lon)
    sdlon = sd(tmp$lon)
    patherr[outcount,] = c(j, meanlat, sdlat, meanlon, sdlon)
    outcount = outcount + 1
  }
    
  # plot the standard deviation in daily lat and lon across the 10 years, and across 6 most recent years
  pdf(file = paste(dirpath, "/ErrorinDailyLocs", species, ".pdf", sep=""), width = 5, height = 8)
  
  ymax = max(c(patherr$sdlat, patherr$sdlon),na.rm=TRUE) 
  sdlocs = ggplot(patherr, aes(jday, sdlat)) + geom_point(size=1) + theme_classic() + 
    ggtitle(paste(species, "sd in daily locs", min(years), "-", max(years))) +
    scale_y_continuous(breaks = seq(0, ymax, by = 0.5), limits = c(0, ymax)) + 
    scale_x_continuous(breaks = seq(0, 366, by = 25), limits = c(0, 366)) + 
    geom_point(aes(jday, sdlon), col = "indianred", size=1) + ylab("stdev daily lat (black) and lon (red)")

  lonlatday = ggplot(patherr, aes(sdlon, sdlat, col=jday)) + geom_point(size=1) + theme_classic() +
    scale_y_continuous(breaks = seq(0, ymax, by = 0.5), limits = c(0, ymax))  +
    scale_x_continuous(breaks = seq(0, ymax, by = 0.5), limits = c(0, ymax)) 
 
  multiplot(sdlocs, lonlatday, cols = 1)
  dev.off()
}
  
  
#---------------------------------
#       plot all migration routes for a species on a map with elevation raster
#---------------------------------
pdf(file = paste(figpath, "/elev-route_summary_all_species.pdf", sep=""), width = 7, height = 4.5)

for (f in 1:length(cfiles)){
  preds = read.table(cfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  preds = subset(preds, year > 2007)
  elev = raster("alt_5m_bil/alt.bil")  #elevation layers
  myext = c(-175, -50, 15, 75) #set extent to North America
  species = preds[1,1] #species name
  
  AllMigration(preds, elev, myext, species)
}
dev.off() 


#---------------------------------
#       plot standard error in predicted centroids across years
#---------------------------------
pdf(file=paste(figpath, "/se_corlon-lat_all_species.pdf", sep=""), width = 10, height = 7)

for (f in 1:length(cfiles)){
  preds = read.table(cfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  species = preds[1,1] #species name
  years = c(2004:2013)

  #grab only the predicted daily centroids from between the migration dates
  for (y in 1:length(years)){
    between = preds[which(preds$year == years[y] & preds$jday >= dates[y,1] & preds$jday <= dates[y,4]),]
  
    if (y == 1){ migpreds = between }
    else{ migpreds = rbind(migpreds, between) }
  }
  
  if (species == "Selasphoruscalliope") {by = 1}
  else {by = 0.5}
  
  ymax = max(c(migpreds$lat_se, migpreds$lon_se))
  latlon = ggplot(migpreds, aes(lon_se, lat_se)) + ggtitle(species) +
    xlab("estimated longitude standard error") + ylab("estimated latitude standard error") +
  geom_point(alpha = 0.5) + theme_classic() + facet_wrap(~year) +
  theme(text = element_text(size=12)) +
  scale_y_continuous(breaks = seq(0, ymax, by = by), limits = c(0, ymax)) +
  scale_x_continuous(breaks = seq(0, ymax, by = by), limits = c(0, ymax))
  multiplot(latlon, cols = 1)
}
dev.off()


#---------------------------------
#       plot standard error in predicted centroids across years, by julian date
#---------------------------------
pdf(file = paste(figpath, "/error_se-latlon_all_species.pdf", sep=""), width = 10, height = 4)

for (f in 1:length(cfiles)){
  preds = read.table(cfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  dates = subset(dates, year > 2007)
  species = preds[1,1] #species name
  years = c(2008:2013)
  
  #grab only the predicted daily centroids from between the migration dates
  for (y in 1:length(years)){
    between = preds[which(preds$year == years[y] & preds$jday >= dates[y,1] & preds$jday <= dates[y,4]),]
    
    if (y == 1){ migpreds = between }
    else{ migpreds = rbind(migpreds, between) }
  }

  ymax = max(c(migpreds$lat_se, migpreds$lon_se)) 
  lat = ggplot(migpreds, aes(jday, lat_se, col=as.factor(year))) + geom_point(size=1) + theme_classic() +
  geom_vline(xintercept = c(dates$spr_begin), col = "cadetblue") +
  geom_vline(xintercept = c(dates$fal_end), col = "orange") + ggtitle(species) +
  xlab("Julian Day") + ylab("latitude centroid standard error") +
  scale_y_continuous(breaks = seq(0, ymax, by = 0.25), limits = c(0, ymax))+
  theme(text = element_text(size=12))

  lon = ggplot(migpreds, aes(jday, lon_se, col=as.factor(year))) + geom_point(size=1) + theme_classic() +
  geom_vline(xintercept = c(dates$spr_begin), col = "cadetblue") +
  geom_vline(xintercept = c(dates$fal_end), col = "orange") + ggtitle(species) +
  xlab("Julian Day") + ylab("longitude centroid standard error") +
  scale_y_continuous(breaks = seq(0, ymax, by = 0.25), limits = c(0, ymax)) +
  theme(text = element_text(size=12))

  multiplot(lat, lon, cols = 2)
}
dev.off()


#---------------------------------
#       plot predicted latitude and longitude as distribution across the years for all species
#---------------------------------
# save plots comparing daily lat and long and migration date across the years
pdf(file = paste(figpath, "/AllYears_lon-lat_all_species.pdf", sep=""), width = 8, height = 5)

for (f in 1:length(cfiles)){
  preds = read.table(cfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  preds = subset(preds, year > 2007)
  dates = subset(dates, year > 2007)
  species = preds[1,1] #species name
  
  yrlylat = ggplot(preds, aes(jday, lat, col=year)) + geom_point(size=1) + theme_classic() +
    geom_vline(xintercept = c(dates$spr_begin), col = "cadetblue") +
    geom_vline(xintercept = c(dates$spr_end), col = "olivedrab3") +
    geom_vline(xintercept = c(dates$fal_end), col = "orange") +
    scale_x_continuous(breaks = seq(0, 365, by = 30)) + 
    theme(text = element_text(size=20)) + ggtitle(species)
  
  yrlylon = ggplot(preds, aes(jday, lon, col=year)) + geom_point(size=1) + theme_classic() +
    geom_vline(xintercept = c(dates$spr_begin), col = "cadetblue") +
    geom_vline(xintercept = c(dates$spr_end), col = "olivedrab3") +
    geom_vline(xintercept = c(dates$fal_end), col = "orange") +
    scale_x_continuous(breaks = seq(0, 365, by = 30)) + 
    theme(text = element_text(size=20))

  multiplot(yrlylat, cols = 1)
  multiplot(yrlylon, cols = 1)
}
dev.off()

#----------------------------------------------------------
#       Makes Figure B3. Observations across the years (2004-2013)
#       For Appendix B
#----------------------------------------------------------

#NOTE!!: For this to work, make sure files and mfiles is in the same species-order!!
for (f in 1:length(files)) {
  
  #read in raw data
  dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  dates = subset(dates, year > 2007)
  
  humdat = read.table(files[f], header=TRUE, sep=",", quote="", fill=TRUE, as.is=TRUE, comment.char="")
  
  names(humdat) = c("SCI_NAME", "PRIMARY_COM_NAME","YEAR", "DAY", "TIME", "GROUP_ID", "PROTOCOL_ID",
                    "PROJ_ID", "DURATION_HRS", "EFFORT_DISTANCE_KM", "EFFORT_AREA_HA", "NUM_OBSERVERS",
                    "LATITUDE", "LONGITUDE", "SUB_ID", "POLYFID", "MONTH")
  
  humdat$MONTH = factor(humdat$MONTH, levels=c(1:12), ordered=TRUE)
  
  #grab species name for later identification
  species = humdat$SCI_NAME[1]
  species = gsub("\"", "", species, fixed=TRUE)
  years = c(2004:2013)
  
  for (y in 1:length(years)){
    dat = subset(humdat, YEAR==years[y])
    yrobs = data.frame(table(dat$DAY))
    yrobs$year = years[y]
    
    if(y == 1) { obs = yrobs}
    else{ obs = rbind(obs, yrobs)}
  }
    names(obs) = c("jday", "freq")
    obs$jday = as.numeric(obs$jday)
    obs$freq = as.numeric(obs$freq)
  ymax = max(obs$freq) + 25
  
  ggplot(obs, aes(jday, freq)) + geom_point(alpha=0.4) + xlab("Julian day of Year") + ylab("Number of Checklists") + 
    theme_classic() + theme(text = element_text(size=12)) +     
    geom_smooth(se=T, method='gam', formula=y~s(x, k=40), gamma=1.5) +
    scale_x_continuous(breaks = seq(0, 365, by = 50), limits = c(0,365)) + 
    scale_y_continuous(breaks = seq(0, ymax, by = 50)) +
    geom_vline(xintercept = dates[,1], col = "cadetblue", size = 0.5) +
    geom_vline(xintercept = dates[,4], col = "orange", size = 0.5) +
    geom_vline(xintercept = dates[,2], col = "olivedrab3", size = 0.5) 

  ggsave(file=paste(figpath, "/B3_occurrences", species, years[y], ".tiff", sep=""), dpi=600, height=3, width=4)
}


#-----------------------------------------------------------
#       Make synthetic panel figure for manuscript  - FIGURE 1
#       Make parts separately, and bring back together 
#         in Illustrator or Inkscape
#-----------------------------------------------------------
#reorder files to match manuscript ordering
files = c(files[4],files[1],files[2],files[3],files[5])
cfiles = c(cfiles[2],cfiles[1],cfiles[4],cfiles[3],cfiles[5])
mfiles = c(mfiles[2],mfiles[1],mfiles[4],mfiles[3],mfiles[5])

for (f in 1:length(files)) {
  
  #read in raw data
  humdat = read.table(files[f], header=TRUE, sep=",", quote="", fill=TRUE, as.is=TRUE, comment.char="")
  
  names(humdat) = c("SCI_NAME", "PRIMARY_COM_NAME","YEAR", "DAY", "TIME", "GROUP_ID", "PROTOCOL_ID",
                    "PROJ_ID", "DURATION_HRS", "EFFORT_DISTANCE_KM", "EFFORT_AREA_HA", "NUM_OBSERVERS",
                    "LATITUDE", "LONGITUDE", "SUB_ID", "POLYFID", "MONTH")
  
  humdat$MONTH = factor(humdat$MONTH, levels=c(1:12), ordered=TRUE)
  
  #grab species name for later identification
  species = humdat$SCI_NAME[1]
  species = gsub("\"", "", species, fixed=TRUE)
   
  #plot the geographic number of checklists for the species, over all the years  
  #find the number of obs in each cell
  t = table(as.factor(humdat$POLYFID))
  
  #Merge the count data for the hexes with all the hexes in the map
  df = data.frame(POLYFID = names(t), count=as.numeric(t))
  df2 = data.frame(POLYFID = unique(hexgrid$POLYFID))
  df3 = merge(df2, df, all.x=TRUE)
  df3$species = species
  
  #combine all the datasets so we can get the colors on the same scale
  if (f == 1) {
    df3.1 = df3
    splist = species
  }
  
  else{
    df3.1 = rbind(df3.1, df3)
    splist = c(splist, species)
  }
}

# Set colors and legend scale by all species (e.g. some will be red, some will be shades of yellow)
  #matches colors with the number of observations
  yltord = colorRampPalette(brewer.pal(9, "YlOrRd"))(length(unique(df3.1$count)))
  cols = data.frame(id=c(NA,sort(unique(df3.1$count))), cols=yltord, stringsAsFactors=FALSE)
  df4 = merge(df3.1, cols, by.x="count", by.y="id")
  #set scale for legend
  vls = sort(unique(round(cols$id/500)*500))
  vls[1] = 1
  cols2 = rev(heat.colors(length(vls)))

#separate back out by species for plotting
for (s in 1:length(splist)){
  df4.1 = subset(df4, species == splist[s])
  df5 = merge(hexgrid, df4.1, by.x="POLYFID", by.y="POLYFID", all.x=TRUE)
  df5$cols = ifelse(is.na(df5$count), "white", df5$cols)  #hexes with no counts are white
  df5 = df5[order(df5$POLYFID),]

  #make a map with hexes colored by the number of times the species was observed in a given hex
  setEPS()
  postscript(file = paste(figpath, "/fig1_col1_", splist[s], ".eps", sep=""), width = 3, height = 3)
  plot(NA, NA, xlim = c(-140,-60), ylim=c(15,55),xlab="", ylab="", axes=FALSE)
    plot(hexgrid, col=df5$cols, border = "white", lwd = 0.25, las=1, add=TRUE)
    mtext(side=2,line=2, splist[s])
    map("worldHires", c("usa", "canada", "mexico"), add=TRUE, cex = 0.25)
  dev.off()
  
  #plot the legend separately
  setEPS()
  postscript(file = paste(figpath, "/fig1_col1_legend.eps", sep=""), width = 3, height = 3)
  plot(NA, NA, xlim = c(0,5), ylim=c(0,5), axes=FALSE, xlab = "", ylab="")
  legend.col(col = cols[,2], lev = sort(unique(df4$count)))
  dev.off()
}

for (f in 1:length(files)) {
  #read in the centroid and migration data (by western flyway), use data for years 2008-2013
  preds = read.table(cfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  preds_sub = preds[which(preds$year > 2007),]
  dates_sub = dates[which(dates$year > 2007),]
  species = preds[1,1]
  years = c(2008:2013)
  
# plot the migration routes in the next figure
preds_sub$month = as.factor(preds_sub$month)
cols3 = data.frame(id=c(sort(unique(preds_sub$month))), cols=tim.colors(length(unique(preds_sub$month))), stringsAsFactors=FALSE)
preds_sub = merge(preds_sub, cols3, by.x="month", by.y="id")
#set color scale
vls = sort(unique(round(cols3$id)))
vls[1] = 1
cols4 = tim.colors(length(vls))

setEPS()
postscript(file = paste(figpath, "/fig1_col2_", species, ".eps", sep=""), width = 3, height = 3)
plot(preds_sub$lon, preds_sub$lat, col=preds_sub$cols, pch=19, cex=0.25, ylab="", xlab="", xlim = c(-140, -60), ylim = c(15, 55),
     cex.lab = 1, cex.axis=1, axes=FALSE)
  map("worldHires", c("usa", "canada", "mexico"), add=TRUE)
  points(preds_sub$lon, preds_sub$lat, col=preds_sub$cols, pch=19, cex=0.25)
dev.off()

#plot legend separately  
setEPS()
postscript(file = paste(figpath, "/fig1_col2_legend.eps", sep=""), width = 3, height = 3)
plot(NA, NA, xlim = c(0,5), ylim=c(0,5), axes=FALSE, xlab = "", ylab="")
  legend("bottomleft", legend=vls, pch=22, pt.bg=cols4, pt.cex=0.75, cex=0.75, bty="n",
       col="black", title="month", x.intersp=1, y.intersp=0.5)  
dev.off()

# plot the latitudinal patterns with estimated dates of migration
setEPS()
postscript(file = paste(figpath, "/fig1_col4_", species, ".eps", sep=""), width = 3, height = 3)
latmin = ((min(preds_sub$lat)%/%5+1)*5)-5
latmax = ((max(preds_sub$lat)%/%5+1)*5) + 5 

par(bty="l")
plot(preds_sub$jday, preds_sub$lat, pch = 19, xlab = "", ylab = "", 
     cex.axis = 1, col = "grey20", cex = 0.25)
  abline(v=dates_sub$spr_begin, col="cadetblue")
  abline(v=dates_sub$spr_end, col = "olivedrab3")
  abline(v=dates_sub$fal_end, col = "orange")
dev.off()

}


#---------------------------------
#       plot predicted latitude with 95% confidence intervals
#---------------------------------
# save plots comparing daily lat and long and migration date across the years

for (f in 1:length(cfiles)){
  preds = read.table(cfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  preds = subset(preds, year > 2007)
  dates = subset(dates, year > 2007)
  species = preds[1,1] #species name
  
  #calculate 95% CI
  preds$lat_ucl = preds$lat + 1.96 * preds$lat_se
  preds$lat_lcl = preds$lat - 1.96 * preds$lat_se
  preds$lon_ucl = preds$lon + 1.96 * preds$lon_se
  preds$lon_lcl = preds$lon - 1.96 * preds$lon_se
  
  years = c(2008:2013)
  #grab only the predicted daily centroids from between the migration dates
  for (y in 1:length(years)){
    between = preds[which(preds$year == years[y] & preds$jday >= dates[y,1] & preds$jday <= dates[y,4]),]
    spring = preds[which(preds$year == years[y] & preds$jday >= dates[y,1] & preds$jday < dates[y,2]),]
    fall = preds[which(preds$year == years[y] & preds$jday > dates[y,2] & preds$jday <= dates[y,4]),]
    
    if (y == 1){
      migpreds = between
      pred_spr = spring
      pred_fal = fall
    }
    else{
      migpreds = rbind(migpreds, between)
      pred_spr = rbind(pred_spr, spring)
      pred_fal = rbind(pred_fal, fall)
    }
  }
     
  # plot spring and fall separately, color-coded to compare overlap
  seasons = ggplot(pred_spr, aes(lon, lat), col = year, group=year, order.by=jday) + theme_classic() + 
    geom_rect(data=pred_spr, aes(xmin=lon_lcl,ymin=lat_lcl,xmax=lon_ucl,ymax=lat_ucl, group=year), fill="cadetblue", alpha=0.02) +
    geom_path(aes(group=year)) + 
    geom_rect(data=pred_fal, aes(xmin=lon_lcl,ymin=lat_lcl,xmax=lon_ucl,ymax=lat_ucl, group=year), fill="orange", alpha=0.02) +
    geom_path(data=pred_fal, aes(lon, lat, group=year)) + 
    theme(text = element_text(size=12)) + xlab("longitude") + ylab("latitude")
  
  pdf(file = paste(figpath, "/fig1_col3_", species, ".pdf", sep=""), width = 3, height = 3)
  multiplot(seasons, cols = 1)
  dev.off()
}


#---------------------------------
#       Figure B1 for appendix, total eBird effort 2004-2013
#---------------------------------

#find the number of obs in each cell
t = table(as.factor(effort$POLYFID))

#Merge the count data for the hexes with all the hexes in the map
df = data.frame(POLYFID = names(t), count=as.numeric(t))
df2 = data.frame(POLYFID = unique(hexgrid$POLYFID))
df3 = merge(df2, df, all.x=TRUE)

#matches colors with the number of observations
#   # Set colors and legend scale by all species (e.g. some will be red, some will be shades of yellow)
yltord = colorRampPalette(brewer.pal(9, "YlOrRd"))(length(unique(df3$count)))
cols = data.frame(id=c(NA,sort(unique(df3$count))), cols=yltord, stringsAsFactors=FALSE)
df4 = merge(df3, cols, by.x="count", by.y="id")
df5 = merge(hexgrid, df4, by.x="POLYFID", by.y="POLYFID", all.x=TRUE)

#   #set scale for legend
vls = sort(unique(round(cols$id/500)*500))
vls[1] = 1
cols2 = rev(heat.colors(length(vls)))

#hexes with no counts are white
df5$cols = ifelse(is.na(df5$count), "white", df5$cols)
df5 = df5[order(df5$POLYFID),]

#make a map with hexes colored by the number of times the species was observed in a given hex
setEPS()
postscript(file = paste(figpath, "/FigureB1.eps", sep=""), width = 5.5, height = 4)
plot(NA, NA, xlim = c(-170,-50), ylim=c(15,75),xlab="", ylab="", axes=FALSE)
plot(hexgrid, col=df5$cols, border = "white", lwd = 0.25, las=1, add=TRUE)
map("worldHires", c("usa", "canada", "mexico"), add=TRUE, cex = 0.25)
dev.off()

#plot the legend separately
setEPS()
postscript(file = paste(figpath, "/figB1_col1_legend.eps", sep=""), width = 3, height = 3)
plot(NA, NA, xlim = c(0,5), ylim=c(0,5), axes=FALSE, xlab = "", ylab="")
legend.col(col = cols[,2], lev = sort(unique(df4$count)))
dev.off()


#--------------------------------------------------
#   wrt question about number of obs in eastern US
#--------------------------------------------------
for (f in 1:length(files)){
  humdat = read.table(files[f], header=TRUE, sep=",", quote="", fill=TRUE, as.is=TRUE, comment.char="")

  names(humdat) = c("SCI_NAME", "PRIMARY_COM_NAME","YEAR", "DAY", "TIME", "GROUP_ID", "PROTOCOL_ID",
                  "PROJ_ID", "DURATION_HRS", "EFFORT_DISTANCE_KM", "EFFORT_AREA_HA", "NUM_OBSERVERS",
                  "LATITUDE", "LONGITUDE", "SUB_ID", "POLYFID", "MONTH")

  humdat$MONTH = factor(humdat$MONTH, levels=c(1:12), ordered=TRUE)

  #grab species name for later identification
  species = humdat$SCI_NAME[1]

  #subset eastern observations
  east = subset(humdat, LONGITUDE > -103)
  eastwinter = subset(east, MONTH %in% c(11, 12, 1, 2, 3))
  plot(table(east$YEAR), main=species)
  plot(table(eastwinter$YEAR), main=species)
}
  

#----------------------------------------------
#       Reviewer comment: detecting trends
#----------------------------------------------

#aggregate all the dates files
for (f in 1:length(mfiles)){
  dates = read.table(mfiles[f], header=TRUE, sep=" ", as.is=TRUE, fill=TRUE, comment.char="")
  if (f == 1) { d = dates }
  else { d = rbind(d, dates) }
}
d = d[,-2]
d = subset(d, year > 2007)
names(d) = c("spring_begin", "peak_latitude", "autumn_end", "species", "year")

#plot the data
spring = ggplot(d, aes(year, spring_begin, group=species)) + geom_line(aes(linetype=species)) + 
  theme_bw() + ylab("begin spring migration")
breed = ggplot(d, aes(year, peak_latitude, group=species)) + geom_line(aes(linetype=species)) + 
  theme_bw() + ylab("reach peak latitude")
autumn = ggplot(d, aes(year, autumn_end, group=species)) + geom_line(aes(linetype=species)) + 
   theme_bw() + ylab("end autumn migration")

pdf(file = paste(figpath, "/migration_trends.pdf", sep=""), width = 6, height = 7)
  multiplot(spring, breed, autumn, cols=1)
dev.off()


library(plyr)
# Break up d by species, then fit the specified model to each piece and return a list
# SPRING
models <- dlply(d, "species", function(df) 
  lm(spring_begin ~ year, data = df))
# Apply coef to each model and return a data frame
ldply(models, coef)
# Print the summary of each model
l_ply(models, anova, .print = TRUE)
l_ply(models, summary, .print = TRUE)

# PEAK LAT
models <- dlply(d, "species", function(df) 
  lm(peak_latitude ~ year, data = df))
# Apply coef to each model and return a data frame
ldply(models, coef)
# Print the summary of each model
l_ply(models, anova, .print = TRUE)

# AUTUMN
models <- dlply(d, "species", function(df) 
  lm(autumn_end ~ year, data = df))
# Apply coef to each model and return a data frame
ldply(models, coef)
# Print the summary of each model
l_ply(models, anova, .print = TRUE)