## Eric M. Lind ## script to generate topology from recursive parent-child table ## i.e., tc='Taxon-Container' ## input is taxon-container csv file ## two columns, taxon & container ## 'taxon' column holds unique id of tips and nodes ## 'container' column holds parent id of taxon ## all containers must have >1 child ## all taxa EXCEPT the root have a container ## taxon with no ancestor is taken as root ## returns object of class 'tree' for use in packages ape, picante, etc. require(ape) tc2ape <- function(tc){ taxon<- as.character(tc[,1]) container<-as.character(tc[,2]) # check if(length(container[!container %in% taxon])>1) stop("more than one root (missing ancestor or taxon)") if(min(table(container[container!=""]))==1){ sngl <- table(container) sngl <- sngl[sngl!=""] sngl.name <- names(which(sngl<2)) warning(c(length(sngl.name)-1," single descendent nodes will be collapsed")) } # total nodes n <- length(taxon) # assign numeric ids node.id <- seq(1:n) ancestor.id <- node.id[match(container,taxon)] # find tips versus internals internal <- unique(sort(ancestor.id)) tmp <- integer(n) tmp[internal] =1 tips <- which(tmp==0) # check if(length(taxon[!taxon %in% container])!=length(tips)) stop("incorrect number of tip taxa") root <- which(is.na(ancestor.id)) internal <- internal[internal!=root] # numeric ids for edge table new_ids <- integer(n) new_ids[tips] <- 1:length(tips) new_ids[root] <- length(tips)+1 new_ids[internal] <- (length(tips)+2):n new_ancestor <- new_ids[ancestor.id] edge <- matrix(NA, n-1,2) edge[,1] <- new_ancestor[!is.na(new_ancestor)] edge[,2] <- new_ids[!is.na(new_ancestor)] labels <- taxon[tips] Nnode <- n - length(tips) tree <- list(edge=edge, Nnode=Nnode, tip.label = labels) class(tree) <- "phylo" return(collapse.singles(tree)) } ## example taxa <- c('ERODIUM CICUTARIUM', 'ERODIUM CICUTARIUM', 'ERODIUM BOTRYS', 'GERANIUM MOLLE','GERANIUM CAROLINIANUM','PELARGONIUM ALCHEMILLOIDES','ERODIUM','GERANIUM','EroGer','Geraniaceae') container <- c('ERODIUM','ERODIUM','ERODIUM','GERANIUM','GERANIUM','Geraniaceae','EroGer','EroGer','Geraniaceae',NA) (tc <- cbind(taxa,container)) (tr <- tc2ape(tc)) plot(tr) write.tree(tr)