/* This is the main source code for the simulations described in the article by Louca et al. - Detecting cyclicity in ecological time series The code is a Monte Carlo simulation of the models. For each run the periodogram is calculated and evaluated as described in the article. Summary statistics of this process are printed out at the end. Model parameters and simulation settings are set in the source file Config.h. Written by Stilianos Louca, December 21, 2013 */ #include #include #include #include #include #include "Config.h" #include "InternalDefs.h" #include "Points.h" #include "LombScargleSpectrum.h" #include "StochasticRungeKutta.h" #include "LogisticGrowthModel.h" #include "GompertzGrowthModel.h" #include "OrnsteinUhlenbeckModel.h" #include "Auxiliary.cpp" #include "PeakSignificance.cpp" //#include "Grid3DInterpolator.h" #include "GridMultilinearInterpolator.h" #include "STPlot/sources/STPlot.h" using namespace std; using namespace ST; #pragma mark - #pragma mark Main #pragma mark - int main(int argc, char* argv[]){ long n,m,k,mode,series_size; bool model_is_cyclic, shortcutted_simulation; ofstream fout, sub_log; string fileBaseName, model_description, category; vector times,spectrum,signal; vector signalTemp; double error_SD, alpha, expected_power_o, expected_lambda, frequency, df, dummyD, dummyD1, dummyD2; double significanceN, significanceOU, significanceWN, fitted_power_o_OU, fitted_theta_OU, fitted_power_e_OU, fitted_power_WN; long failedSpectrumRejects, lowModeRejects, averagedPeriodograms, failedSignificanceRejects, peakNotRealFreqRejects, failedSetup; long alpha_epsilon_bin, countExamined, countCyclic[2][2], bin_size; //countCyclic[a][b] for a=1 iff non-OU, b=1 iff non-WN int cyclicByOU, cyclicByWN; LGDynamics LGmodel; OUDynamics OUmodel; vector eFAP_CDF_corrected, eFAP_CDF_uncorrected, eFAP_CDF_uncorrected_binnedByRho_and_NSR; GridMultilinearInterpolator CDFInterpolator, FAPcorrector; string CDFgrid_output_dir, all_plots_script; const vector FAP_correction_grid_eFAPs = string2vector(FAP_CORRECTION_GRID_eFAPs); const vector FAP_correction_grid_rhos = string2vector(FAP_CORRECTION_GRID_RHOs); const vector FAP_correction_grid_NSRs = string2vector(FAP_CORRECTION_GRID_NSRs); const long gridN_rhos = FAP_correction_grid_rhos.size(); const long gridN_NSRs = FAP_correction_grid_NSRs.size(); const long gridN_eFAPs = FAP_correction_grid_eFAPs.size(); vector estimatedPvaluesOU, estimatedPvaluesOUuncorrected; vector > > estimatedPvaluesOUuncorrected_binnedByRho_and_NSR(gridN_rhos, vector >(gridN_NSRs)); if((FAP_CDF_CORRECTION!="") && USE_STATE_SPACE_MODEL){ cout << " Found FAP corrector for OUSS model: Will be correcting eFAPs\n"; } if(FAP_CDF_CORRECTION!=""){ if(!FAPcorrector.load(4,FAP_CDF_CORRECTION)){ cout << " ERROR: Could not load FAP_CDF_CORRECTION string\n"; return 0; } } //debug /* vector dtimes = string2vector("0 0.5 1 1.5 2 2.5 3 3.5 4 4.5 5 5.5 6 6.5 7 7.5 8 8.5 9 9.5 10 10.5 11 11.5 12 12.5 13 13.5 14 14.5 15 15.5 16 16.5 17 17.5 18 18.5 19 19.5 20 20.5 21 21.5 22 22.5 23 23.5 24 24.5 25 25.5 26 26.5 27 27.5 28 28.5 29 29.5 30 30.5 31 31.5 32 32.5 33 33.5 34 34.5 35 35.5 36 36.5 37 37.5 38 38.5 39 39.5 40 40.5 41 41.5 42 42.5 43 43.5 44 44.5 45 45.5 46 46.5 47 47.5 48 48.5 49 49.5 50"); vector dvalues = string2vector("0.941695 1.10381 0.927612 1.06133 0.724401 0.818952 0.833921 0.789225 1.06215 1.16206 0.862879 0.864727 1.35531 1.50136 1.09196 1.1722 1.20303 0.945381 0.828455 0.755115 1.06531 0.811853 0.445822 0.748415 0.749771 0.780996 1.0017 1.12162 1.2175 0.780049 0.910242 1.16985 1.35972 1.03736 1.13655 1.1875 0.810759 0.863924 0.96702 0.96132 0.814535 1.04363 0.743377 0.876182 0.968381 1.09869 1.12217 0.95409 0.716318 0.888672 1.07044 1.11015 0.927606 0.886407 0.93287 1.2959 1.47829 0.997813 1.25022 0.76487 1.06667 1.62357 1.04338 0.859184 1.12778 0.939522 0.717104 0.748877 0.704865 0.853298 1.26531 1.2543 1.23894 0.953348 1.06061 0.935198 1.37771 1.10347 1.05327 1.16348 1.03826 1.08092 0.950312 1.16824 1.27124 0.889717 0.659243 0.74293 1.15556 0.952268 1.18929 1.08938 0.800359 0.69744 0.918542 0.703612 1.05833 1.29011 1.20261 1.02755 1.05131"); //calculate periodogram of sample path long N = dtimes.size(); double T = (dtimes.back()-dtimes.front()), meandt = T/(N-1); if(!LombScargleSpectrum(dvalues, dtimes, 0, dtimes.size()-1, 0, NULL, NULL, &spectrum, df)){ cout << "error LSS\n"; return 0; } //get significance bool dummyB; if(!significanceOfSpectralPeak_OUSS(df, dtimes, dvalues, spectrum, min_mode, dummyD1, dummyD2, 1, 1, 1, significanceOU, fitted_theta_OU, fitted_power_o_OU, fitted_power_e_OU, significanceWN, fitted_power_WN, dummyB)){ cout << "error signif\n"; return 0; } const double fitted_rho = exp(-fitted_theta_OU*meandt); const double fitted_NSR = fitted_power_e_OU/fitted_power_o_OU; double fitted_sigma = sqrt((fitted_power_o_OU/meandt)*(1-exp(-fitted_theta_OU*meandt))/(1+exp(-fitted_theta_OU*meandt)));; double fitted_epsilon = sqrt(fitted_power_e_OU/meandt); double signifCorrected = FAPcorrector.interpolate(0, (double) dtimes.size(), (double) fitted_rho, (double) fitted_NSR, (double) significanceOU); cout << " debug: fitted_rho=" << fitted_rho << ", fitted_NSR=" << fitted_NSR << ", fitted_lambda=" << fitted_theta_OU << ", fitted_power_o=" << fitted_power_o_OU << ", fitted_power_e=" << fitted_power_e_OU << "\n" << " df=" << df << ", dt=" << meandt << ", significanceWN=" << significanceWN << ", fitted_power_WN=" << fitted_power_WN << "\n" << " fitted sigma=" << fitted_sigma << ", fitted_epsilon=" << fitted_epsilon << "\n" << " significanceOU=" << significanceOU << ", corrected=" << signifCorrected << endl; return 0; */ //seed random number generator srand(time(0)); //figure out time series sizes to consider vector series_sizes; if(BUILD_FAP_CDF_GRID){ if(simul_alpha_max>EPSILON*simul_K){ cout << " Warning: alpha_max>0 (i.e. model will be cyclic), but CDF grid calculation requested for FAP estimator. This makes little sense..\n"; } string2vector(FAP_CORRECTION_GRID_TSS, series_sizes); CDFInterpolator.setLayerPoints(4, FAP_CORRECTION_GRID_RHOs, FAP_CORRECTION_GRID_NSRs, FAP_CORRECTION_GRID_eFAPs); CDFgrid_output_dir = getNextNonExistingChild(OUTPUT_DIR, "FAP_CDF_calculation_", "", 2); }else{ series_sizes.push_back(default_series_size); } for(long s=0; s >(gridN_NSRs)); estimatedPvaluesOUuncorrected.clear(); estimatedPvaluesOU.clear(); countExamined = 0; countCyclic[0][0]=countCyclic[0][1]=countCyclic[1][0]=countCyclic[1][1]=0; failedSpectrumRejects=lowModeRejects=failedSignificanceRejects=peakNotRealFreqRejects=failedSetup=averagedPeriodograms=0; //figure out output directory and files string output_subdir = (BUILD_FAP_CDF_GRID ? CDFgrid_output_dir+"/TS_"+makeString(series_size) : getNextNonExistingChild(OUTPUT_DIR, "run_", "", 2)); createDirectory(output_subdir); all_plots_script = (BUILD_FAP_CDF_GRID ? CDFgrid_output_dir : output_subdir)+"/all_plots_script.sh"; if(s==0){ //make a copy of the config file (if available) copySilently("source/Config.h", (BUILD_FAP_CDF_GRID ? CDFgrid_output_dir : output_subdir)+"/Config.h"); } if(BUILD_FAP_CDF_GRID){ cout << " Performing statistics for time series size " << series_size << "\n"; } cout << " Output will be written to: " << output_subdir << "\n"; // write some feedback const string logFileName = output_subdir + string("/log.txt"); openOutputFile(logFileName, sub_log, cout); sub_log << comment_prefix << " " << MODEL << " model: " << model_description << " (with variable parameters, see below)\n" << comment_prefix << " r = " << simul_r << (USE_OUSS_RHO_INSTEAD_OF_R && (MODEL=="OU") ? " (not relevant)" : "") << "\n" << comment_prefix << " OUSS rho = [" << simul_OUSS_rho_min << ", " << simul_OUSS_rho_max << "]" << (USE_OUSS_RHO_INSTEAD_OF_R && (MODEL=="OU") ? "" : " (not relevant)") << "\n" << comment_prefix << " K = " << simul_K << "\n" << comment_prefix << " SD, sigma = [" << simul_sigma_min << ", " << simul_sigma_max << "]" << (SCALE_LG_NOISE && (MODEL=="LG") ? " at equilibrium, but proportional to N/K in general" : "") << "\n" << comment_prefix << " cycle amplitude, alpha = [" << simul_alpha_min << ", " << simul_alpha_max << "]" << (simul_alpha_relative2SD ? " relative to sigma" : "") << "\n" << comment_prefix << " period, T = [" << simul_T_min << ", " << simul_T_max << "]\n" << comment_prefix << " epsilon = " << (USE_STATE_SPACE_MODEL ? stringprintf("[%.3g, %.3g]%s (state space model)", double(simul_epsilon_min), double(simul_epsilon_max), (simul_epsilon_relative2SD ? " relative to sigma" : "")) : "0") << "\n" << comment_prefix << " simulation time = " << simulation_time << "\n" << comment_prefix << " time series size = " << series_size << "\n" << comment_prefix << " frequency tolerance = " << frequency_tolerance << "\n" << comment_prefix << " min mode = " << min_mode << "\n" << comment_prefix << " OU significance threshold = " << significance_level_OU << "\n" << comment_prefix << " WN significance threshold = " << significance_level_WN << "\n" << comment_prefix << " FAP estimation using " << SIGNIF_BERNOULLI_TRIALS << " Bernoulli trials\n" << comment_prefix << " Monte Carlo count = " << Monte_Carlo_count << "\n"; if(MODEL=="OU" && SHORTCUT_OUSS_SIMULATIONS){ sub_log << comment_prefix << " Time series generated using correlated draws\n"; }else{ sub_log << comment_prefix << " Runge-Kutta integration time step = " << time_step << "\n"; } sub_log << flush; long count, totalCount = Monte_Carlo_count; Scanner2D alpha_epsilon_scanner; alpha_epsilon_scanner.setup(2, totalCount, max(1,simul_alpha_grid_size), max(1,simul_epsilon_grid_size), simul_alpha_min, simul_alpha_max, simul_epsilon_min, simul_epsilon_max); long feedbackPeriod = max((long)1, (long) ((1.0*totalCount) / feedback_count)); //Generate time series for(count=1, n=0; n(signal, times, 0, times.size()-1, 0, NULL, NULL, &spectrum, df)){ ++failedSpectrumRejects; continue; } //analyze periodogram mode = maximumIndex(spectrum, 1, spectrum.size()-1); frequency = df*mode; bool reject = false; if((simul_alpha_max>EPSILON) && (frequency_tolerance*frequency0)){ ++peakNotRealFreqRejects; reject = true; category = "_false_frequency"; }else if(mode < min_mode){ ++lowModeRejects; reject = true; category = "_low_mode"; }else{ if(USE_STATE_SPACE_MODEL){ bool averagedPeriodogram; if(!significanceOfSpectralPeak_OUSS(df, times, signal, spectrum, min_mode, dummyD1, dummyD2, OUSS_lambda, // provide true params if needed for debugging sigma, // provide true params if needed for debugging error_SD, // provide true params if needed for debugging significanceOU, fitted_theta_OU, fitted_power_o_OU, fitted_power_e_OU, significanceWN, fitted_power_WN, averagedPeriodogram)){ ++failedSignificanceRejects; if(averagedPeriodogram) ++averagedPeriodograms; reject=true; category = "_failed_fit"; } }else{ if(!significanceOfSpectralPeak_OU(df, T, spectrum, min_mode, dummyD1, dummyD2, significanceOU, fitted_power_o_OU, fitted_theta_OU, significanceWN, fitted_power_WN)){ ++failedSignificanceRejects; reject=true; category = "_failed_fit"; } fitted_power_e_OU = 0; } } const double fitted_rho = exp(-fitted_theta_OU*meandt); const double fitted_NSR = fitted_power_e_OU/fitted_power_o_OU; // count towards statistics if(!reject){ const long rho_bin = GridMultilinearInterpolator::findOn1DGrid(FAP_correction_grid_rhos, fitted_rho); const long NSR_bin = GridMultilinearInterpolator::findOn1DGrid(FAP_correction_grid_NSRs, fitted_NSR); estimatedPvaluesOUuncorrected_binnedByRho_and_NSR[max(0l,min(gridN_rhos-1,rho_bin))][max(0l,min(gridN_NSRs-1,NSR_bin))].push_back(significanceOU); estimatedPvaluesOUuncorrected.push_back(significanceOU); if((FAP_CDF_CORRECTION!="") && USE_STATE_SPACE_MODEL){ //correct OUSS FAP using pre-calculated correction grid significanceOU = FAPcorrector.interpolate(0, (double) series_size, (double) fitted_rho, (double) fitted_NSR, (double) significanceOU); } cyclicByOU = (significanceOU < significance_level_OU); cyclicByWN = (significanceWN < significance_level_WN); category = string(cyclicByOU==1 ? "_OU" : "_") + (cyclicByWN==1 ? "_WN" : "_"); ++countExamined; ++countCyclic[(cyclicByOU ? 1 : 0)][(cyclicByWN ? 1 : 0)]; estimatedPvaluesOU.push_back(significanceOU); alpha_epsilon_scanner.add_values_to_bin(alpha_epsilon_bin, (cyclicByOU ? 1.0 : 0.0), (cyclicByWN ? 1.0 : 0.0)); } //also calculate averaged periodogram (if applicable) bool averaged_spectrum = false; long N_averaged = N; double dt_averaged = delta, df_averaged=df; vector spectrum_averaged; if(PS_MIN_SERIES_SEGMENT_SIZE_FOR_AVERAGED_PERIODOGRAM>1){ // starting from non-averaged periodogram fit, try to fit averaged periodogram const long segmentsAim = min(N/max(1,PS_MIN_SERIES_SEGMENT_SIZE_FOR_AVERAGED_PERIODOGRAM), (long)((-log(PS_MIN_RHO_FOR_AVERAGING)/fitted_theta_OU)*(N-PS_MAX_OMITTED_DATA_POINTS_IN_AVERAGING)/T)); if(segmentsAim>=PS_MIN_NUMBER_OF_SEGMENTS_FOR_AVERAGING){ long segmentsAchieved; if(averagedLombScargleSpectrum( signal, times, 0, times.size()-1, segmentsAim, PS_MIN_NUMBER_OF_SEGMENTS_FOR_AVERAGING, 2, -1, segmentsAchieved, spectrum_averaged, df_averaged, dt_averaged, N_averaged)){ averaged_spectrum = true; // correct for different effective delta for(long m=0; m Classified as not " << (cyclicByOU==1 ? "OU" : "..") << (cyclicByWN==1 ? ",WN" : ",..") << "\n"; } //time series fout << comment_prefix << "Data index 0\n" << comment_prefix << "Data columns: time population_size\n"; for(m=0; m tempV; for(long r=0; r0 ? makeString(peakNotRealFreqRejects) + " peak not real freq" : "no frequency filtering") << ", " << averagedPeriodograms << " periodograms were averaged for fitting\n" << comment_prefix << " Of " << countExamined << " examined, cyclic by:\n" << comment_prefix << " OU: " << cOU << " (" << (100.0*cOU)/countExamined << "%)\n" << comment_prefix << " WN: " << cWN << " (" << (100.0*cWN)/countExamined << "%)\n" << comment_prefix << " OU & WN: " << cOUWN << " (" << (100.0*cOUWN)/countExamined << "%)\n"; //cycle detection rate over varying alpha if(simul_alpha_grid_size>1){ sub_log << "\n\n" << comment_prefix << " Detected cycles by OU & WN for varying alpha" << (simul_alpha_relative2SD ? " (relative to OU SD)" : "") << ", averaged over all epsilon\n" << comment_prefix << " Data columns: alpha\t\tOU-cycles(fraction)\t\tWN-cycles(fraction)\t\tN\n"; for(long i=0; i1)){ sub_log << "\n\n" << comment_prefix << " Detected cycles by OU & WN for varying epsilon" << (simul_epsilon_relative2SD ? " (relative to OU SD)" : "") << ", averaged over all alpha\n" << comment_prefix << " Data columns: epsilon\t\tOU-cycles(fraction)\t\tWN-cycles(fraction)\t\tN\n"; for(long i=0; i1))){ sub_log << "\n\n" << comment_prefix << " Detected cycles by OU & WN for varying alpha" << (simul_alpha_relative2SD ? " (relative to OU SD)" : "") << " and/or epsilon" << (simul_epsilon_relative2SD ? " (relative to OU SD)" : "") << "\n" << comment_prefix << " Data columns: alpha\t\tepsilon\t\tOU-cycles(fraction)\t\tWN-cycles(fraction)\t\tN\n"; for(long r=0; r=" << FAP_correction_grid_rhos[r] << ", NSR>=" << FAP_correction_grid_NSRs[n] << ": " << estimatedPvaluesOUuncorrected_binnedByRho_and_NSR[r][n].size() << "\n"; } } sub_log << comment_prefix << " Data columns: fitted_rho fitted_NSR eFAP CDF\n"; for(long r=0; r1){ plot.title = "Cycle detection rate over $alpha$"; plot.axisLabels = MultiLabel("$alpha$", "fraction"); plot.range = MultiRange(Range(), Range(0,1)); plot.sources.push_back(PlotSource(PlotType2DCurve, "OU", logFileName, index, 1, 2)); plot.sources.push_back(PlotSource(PlotType2DCurve, "WN", logFileName, index, 1, 3)); multiPlot.plots.push_back(plot); ++index; } //cycle detection over epsilon if(USE_STATE_SPACE_MODEL && (simul_epsilon_grid_size>1)){ plot.title = "Cycle detection rate over $epsilon$"; plot.axisLabels = MultiLabel("$epsilon$", "fraction"); plot.range = MultiRange(Range(), Range(0,1)); plot.sources.clear(); plot.sources.push_back(PlotSource(PlotType2DCurve, "OU", logFileName, index, 1, 2)); plot.sources.push_back(PlotSource(PlotType2DCurve, "WN", logFileName, index, 1, 3)); multiPlot.plots.push_back(plot); ++index; } if(simul_alpha_grid_size && (USE_STATE_SPACE_MODEL && (simul_epsilon_grid_size>1))) ++index; //empirical CDF for uncorrected eFAPs plot.title = "Empirical CDF for estimated (uncorrected) eFAPs"; plot.axisLabels = MultiLabel("eFAP", "CDF"); plot.range = MultiRange(Range(), Range()); plot.sources.clear(); plot.xLogarithmic=plot.yLogarithmic=true; plot.sources.push_back(PlotSource(PlotType2DAnalytic, "ideal", "x", 0)); plot.sources.back().setColor(Color(0.5,0.5,0.5)); plot.sources.push_back(PlotSource(PlotType2DCurve, "FAP CDF", logFileName, index, 1, 2)); plot.sources.back().setColor(Color(0.9,0,0)); multiPlot.plots.push_back(plot); ++index; ++index; //empirical CDF for corrected eFAPs plot.title = "Empirical CDF for estimated (corrected) eFAPs"; plot.axisLabels = MultiLabel("eFAP", "CDF"); plot.range = MultiRange(Range(), Range()); plot.sources.clear(); plot.xLogarithmic=plot.yLogarithmic=true; plot.sources.push_back(PlotSource(PlotType2DAnalytic, "ideal", "x", 0)); plot.sources.back().setColor(Color(0.5,0.5,0.5)); plot.sources.push_back(PlotSource(PlotType2DCurve, "FAP CDF", logFileName, index, 1, 2)); plot.sources.back().setColor(Color(0,0,0.8)); multiPlot.plots.push_back(plot); ++index; ++index; //plot string errorMessage; plotWithLog(multiPlot, output_subdir+"/summary_plot_script.gp", output_subdir+"/summary_plot.pdf", output_subdir+"/summary_plot.svg", "summary", NULL, false, all_plots_script, errorMessage); } } //summarize CDF estimation and write to log if(BUILD_FAP_CDF_GRID){ const string superlogFileName = CDFgrid_output_dir+"/log.txt"; if(openOutputFile(superlogFileName, fout, cout)){ fout << comment_prefix << " CDF estimation for estimated, raw (uncorrected) FAPs (eFAP)\n" << comment_prefix << " CDF specified on a TSS x fitted_rho x fitted_NSR x eFAP grid\n" << comment_prefix << " Time series sizes (TSS): " << FAP_CORRECTION_GRID_TSS << "\n" << comment_prefix << " rhos (correlation between time steps): " << FAP_CORRECTION_GRID_RHOs << "\n" << comment_prefix << " NSRs (noise-to-signal ratio): " << FAP_CORRECTION_GRID_NSRs << "\n" << comment_prefix << " eFAPs: " << FAP_CORRECTION_GRID_eFAPs << "\n"; fout << "\n\n" << comment_prefix << " Index 0\n" << comment_prefix << " As single line string:\n" << CDFInterpolator.getGridAsString() << "\n"; fout << "\n\n" << comment_prefix << " Index 1\n" << comment_prefix << " As scan\nData columns: TSS fitted_rho fitted_NSR eFAP CDF\n"; CDFInterpolator.printGridScann(fout); fout.close(); } } return 0; }