# This R code tests all time series in the GPDD for cyclicity vs the OUSS null model as well as white noise. # Execute using source("GPDD_main.R") in the console. # GPDD should be in the form of separate files, one for each time series (specified below) # Each time series file name should begin with "GPDD_" # Comments in time series should be preceeded by '#' # Time series should consist of two comma-separated columns (time and signal values). # No other uncommented text is allowed. # You can mess with some of the parameters of the analysis below. # The actual peak detection and statistical evaluation is done using the peacots package, available on CRAN. # # Stilianos Louca # October 26, 2014 library(peacots) ##################################### # Options minDataSize = 25; # Ignore GPDD time series of smaller size OUSSsignificanceLevel = 0.05; WNsignificanceLevel = 0.05; GPDDdir = "GPDD"; # Directory with GPDD time series files GPDDfilePrefix = "GPDD_"; # Prefix of individual GPDD time series files plotTimeSeries = TRUE; # Whether to plot and save a detailed report of each analyzed time series. log_transform_data = FALSE; # Whether to log-transform time series data. If true, then time series with non-positive entries will be skipped. detrend_WN = TRUE; # Whether to detrend time series before testing against WN (not relevant for OUSS) minPeakFreq_OUSS = 0.0; # Only search for periodogram peak among frequencies greater or equal to this threshold. Lower-frequency modes are omitted. Only used for OUSS test. Note that zero-mode is always discarded. minFitFreq_OUSS = 0.0; # Fit OUSS & WN models using only frequencies greater or equal to this threshold. Only used for OUSS test. Note that zero-mode is always discarded. minPeakFreq_WN = 0.0; # Similar to minPeakFreq_OUSS, but for WN test. Note that zero-mode is always discarded. minFitFreq_WN = 0.0; # Similar to minFitFreq_OUSS, but for WN test. Note that zero-mode is always discarded. skipSingleSidesPeaks_WN = TRUE; # If true, then cases where the periodogram peak is not trully a peak (i.e. the next-lowest frequency does not have lower power), are skipped skipSingleSidesPeaks_OUSS = TRUE; # ########################################### #### Auxiliary functions getNonExistingFile = function(parentDir, baseName){ DEFAULT_NUMWIDTH = 2; n = 0; while(TRUE){ n = n+1; cfile = paste0(parentDir, "/", baseName, formatC(n, width=DEFAULT_NUMWIDTH, flag="0")); if(!file.exists(cfile)){ return(cfile); } } } linearExtrapolate = function(x1, x2, y1, y2, x){ return(y1 + (y2-y1)*(x-x1)/(x2-x1)); } # Calculate cumulative distribution function (CDF) from samples # All Xvalues on the left of all samples will be assigned a CDF=0, all Xvalues to the right of all samples will be assigned a CDF=1. # Returns a vector of same size as Xvalues[] with corresponding CDF values getEmpiricalCumulativeDF = function(samples, # (input) vector of samples (x-values) from which to estimate cumulative distribution function. Samples might get re-ordered. Xvalues){ # (input) vector of X-values for which to estimate CDF. Must be in increasing order. NS = length(samples); NX = length(Xvalues); CDFvalues = rep(0,NX); samples = sort(samples); xi = 1; for(si in 1:NS){ if(xi>NX) break; while(samples[si]>Xvalues[xi]){ if(si==1){ CDFvalues[xi] = 0; }else{ CDFvalues[xi] = linearExtrapolate(samples[si-1], samples[si], (si-1)/NS, si/NS, Xvalues[xi]); } xi = xi+1; if(xi>NX) break; } } if(xi<=NX){ for(i in xi:NX){ CDFvalues[i] = 1; } } return(CDFvalues); } # Function for plotting peak analysis results for a particular time series plotPeakAnalysis = function(name, times, signal_raw, signal_OUSS, signal_WN, peak_OUSS, peak_WN, dont_show_OUSS_fit, dont_show_WN_fit, plotDir){ old.par <- par(mfrow=c(2, 3)) delta = (tail(times,n=1)-times[1])/(length(times)-1); # time step #plot time series plot(ts(times), ts(signal_raw), xy.label=FALSE, type="l", ylab="Value", xlab="time", main=sprintf("Time series raw (%s)", name), cex=0.8, cex.main=0.9) # OUSS #plot time series OUSS plot(ts(times), ts(signal_OUSS), xy.label=FALSE, type="l", ylab="Value", xlab="time", main=sprintf("Time series\n(used for OUSS analysis)"), cex=0.8, cex.main=0.9) if(!peak_OUSS$error){ #plot periodogram OUSS plot(ts(peak_OUSS$frequencies), ts(peak_OUSS$periodogram), xy.label=FALSE, type="l", ylab="Power", xlab="frequency", main=ifelse(dont_show_OUSS_fit!="", sprintf("Power spectrum (%s)\n(%s)", name, dont_show_OUSS_fit), sprintf("Power spectrum (%s)\n(fmax=%.3g, POUSS=%.2g)", name,peak_OUSS$frequencies[peak_OUSS$peakMode],peak_OUSS$P)), col="black", cex=0.8, cex.main=0.9); if(dont_show_OUSS_fit==""){ #plot fitted OUSS periodogram lines(peak_OUSS$frequencies[peak_OUSS$minFitMode:length(peak_OUSS$frequencies)], peak_OUSS$fittedPS[peak_OUSS$minFitMode:length(peak_OUSS$fittedPS)], col="red"); #plot legend legend((0.6*peak_OUSS$frequencies[1]+0.4*tail(peak_OUSS$frequencies,1)), (0.85*max(peak_OUSS$periodogram)), c("periodogram", "fitted OUSS"), lty=c(1,1), col=c("black", "red"), bty="n", cex=0.8) } } # WN #plot time series WN plot(ts(times), ts(signal_WN), xy.label=FALSE, type="l", ylab="Value", xlab="time", main=sprintf("Time series\n(used for WN analysis)"), cex=0.8, cex.main=0.9) if(!peak_WN$error){ #plot periodogram WN plot(ts(peak_WN$frequencies), ts(peak_WN$periodogram), xy.label=FALSE, type="l", ylab="Power", xlab="frequency", main=ifelse(dont_show_WN_fit!="", sprintf("Power spectrum (%s)\n(%s)", name, dont_show_WN_fit), sprintf("Power spectrum (%s)\n(fmax=%.3g, PWN=%.2g)", name,peak_WN$frequencies[peak_WN$peakMode],peak_WN$P)), col="black", cex=0.8, cex.main=0.9); if(dont_show_WN_fit==""){ #plot fitted white noise periodogram lines(c(peak_WN$frequencies[peak_WN$minFitMode],tail(peak_WN$frequencies,1)), c(peak_WN$powerEstimate, peak_WN$powerEstimate ), col="blue"); #plot legend legend((0.6*peak_WN$frequencies[1]+0.4*tail(peak_WN$frequencies,1)), (0.85*max(peak_WN$periodogram)), c("periodogram", "fitted WN"), lty=c(1,1), col=c("black", "blue"), bty="n", cex=0.8) } } par(old.par) #save plot as PDF if(plotDir != FALSE){ dir.create(plotDir, showWarnings=FALSE); dev.copy2pdf(file=paste(plotDir, paste0(filename,"_plot.pdf"), sep="/")); # write data to file dataFile = paste(plotDir, paste0(filename,"_data.txt"), sep="/"); cat(sprintf("# OUSS and WN analysis of times series %s\n# times\tsignal_raw\tsignal_OUSS\tsignal_WN\n", name), file=dataFile, append=FALSE); write.table(data.frame(times, signal_raw, signal_OUSS, signal_WN), , file=dataFile, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t"); cat(sprintf("\n\n# periodogram of time series used for OUSS test\n# frequency\tpower\n"), file=dataFile, append=TRUE); write.table(data.frame(peak_OUSS$frequencies, peak_OUSS$periodogram), , file=dataFile, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t"); cat(sprintf("\n\n# periodogram of time series used for WN test\n# frequency\tpower\n"), file=dataFile, append=TRUE); write.table(data.frame(peak_WN$frequencies, peak_WN$periodogram), , file=dataFile, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE, sep="\t"); } } ############################################# #### GPDD analysis #Look for GPDD time series files dataFiles = list.files(path=paste(getwd(),GPDDdir,sep="/"), pattern=sprintf("^%s", GPDDfilePrefix), all.files=FALSE, full.names=FALSE, recursive=FALSE, ignore.case=TRUE); # create new output dir and save copy of this script for future reference outputDir = getNonExistingFile(paste0(dirname(GPDDdir), "/GPDD_runs"), "run_"); cat(sprintf("Analyzing GPDD.. All output will be written to '%s'\n", outputDir)); dir.create(outputDir, showWarnings=FALSE, recursive=TRUE); file.copy(from="GPDD_main.R", to=outputDir, overwrite=TRUE); plotDir = paste0(outputDir,"/plots"); dir.create(plotDir, showWarnings=FALSE, recursive=TRUE); logFile = paste0(outputDir,"/log.txt"); scatterFile = paste0(outputDir,"/scatter_data.txt"); #Read GPDD time series from individual files and test each one for cyclicity tooLowFreq_OUSS=0; errors_OUSS=0; errors_WN=0; cOUSS=cWN=0; failedLogTransform=0; tooSmall=0; peakAtFirstMode_OUSS=0; peakOnlySingleSided_WN=0; peakOnlySingleSided_OUSS=0; FAPs_OUSS=c(); FAPs_WN = c(); count = 0; totalCount = length(dataFiles); feedbackPeriod = max(1, floor(totalCount/20)); dont_show_OUSS_fit=""; dont_show_WN_fit=""; OUSS_WN_RESULTS_TABLE = NULL; for(filename in dataFiles){ if(((count %% feedbackPeriod) == 0) && (count>0)){ cat(sprintf("\n %.2g%% finished..\n", (100.0 * count)/totalCount)); } count = count+1; counted_this_OUSS = counted_this_WN = FALSE; #Load particular time series from data file timeSeries = read.csv(paste(GPDDdir,filename,sep="/"), header=FALSE, sep=",", comment.char="#"); times = timeSeries[,1]; signal_raw = timeSeries[,2]; time_step = (tail(timeSeries[,1],n=1)-timeSeries[1,1])/(length(timeSeries[,1])-1); #ignore time series if too small if(length(times)=peak_OUSS$periodogram[peak_OUSS$peakMode]) || (peak_OUSS$peakMode==length(peak_OUSS$frequencies))); if(temp_peakOnlySingleSided_OUSS){ peakOnlySingleSided_OUSS = peakOnlySingleSided_OUSS + 1; } if((peak_OUSS$P=peak_WN$periodogram[peak_WN$peakMode]) || (peak_WN$peakMode==length(peak_WN$frequencies))); if(temp_peakOnlySingleSided_WN) peakOnlySingleSided_WN = peakOnlySingleSided_WN + 1; if((peak_WN$P