diff --git a/.xo-statistics/.gitignore b/.xo-statistics/.gitignore deleted file mode 100644 index 13c0afb7..00000000 --- a/.xo-statistics/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -# clangd working space (see emacs+lsp) -.cache -# typical cmake build directory (source-tree-nephew) -.build* -# symlink to builddir/compile_commands.json; should be set manually in dev sandbox -compile_commands.json diff --git a/.xo-statistics/CMakeLists.txt b/.xo-statistics/CMakeLists.txt deleted file mode 100644 index c01852a9..00000000 --- a/.xo-statistics/CMakeLists.txt +++ /dev/null @@ -1,39 +0,0 @@ -# xo-statistics/CMakeLists.txt - -cmake_minimum_required(VERSION 3.10) - -project(xo_statistics VERSION 1.0) - -include(GNUInstallDirs) -include(cmake/xo-bootstrap-macros.cmake) - -xo_cxx_toplevel_options3() - -# ---------------------------------------------------------------- -# bespoke (usually temporary) c++ settings - -set(PROJECT_CXX_FLAGS "") -#set(PROJECT_CXX_FLAGS "-fconcepts-diagnostics-depth=2") -add_definitions(${PROJECT_CXX_FLAGS}) - -# ---------------------------------------------------------------- - -#add_subdirectory(example) -#add_subdirectory(utest) - -# ---------------------------------------------------------------- -# output targets - -set(SELF_LIB xo_statistics) -xo_add_headeronly_library(${SELF_LIB}) - -# ---------------------------------------------------------------- -# standard install + provide find_package() support - -xo_install_library4(${SELF_LIB} ${PROJECT_NAME}Targets) -xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets) - -# ---------------------------------------------------------------- -# install additional components - -#install(TARGETS statistics_ex1 DESTINATION bin/xo-statistics/example) diff --git a/.xo-statistics/cmake/xo-bootstrap-macros.cmake b/.xo-statistics/cmake/xo-bootstrap-macros.cmake deleted file mode 100644 index 592272c0..00000000 --- a/.xo-statistics/cmake/xo-bootstrap-macros.cmake +++ /dev/null @@ -1,41 +0,0 @@ -# ---------------------------------------------------------------- -# for example: -# $ PREFIX=/usr/local # for example -# $ cmake -DCMAKE_MODULE_PATH=prefix -DCMAKE_INSTALL_PREFIX=$PREFIX -B .build -# -# will get -# CMAKE_MODULE_PATH -# from xo-cmake-config --cmake-module-path -# -# and expect .cmake macros in -# CMAKE_MODULE_PATH/xo_macros/xo_cxx.cmake -# ---------------------------------------------------------------- - -find_program(XO_CMAKE_CONFIG_EXECUTABLE NAMES xo-cmake-config REQUIRED) - -if ("${XO_CMAKE_CONFIG_EXECUTABLE}" STREQUAL "XO_CMAKE_CONFIG_EXECUTABLE-NOT_FOUND") - message(FATAL "could not find xo-cmake-config executable") -endif() - -message(STATUS "XO_CMAKE_CONFIG_EXECUTABLE=${XO_CMAKE_CONFIG_EXECUTABLE}") - -if (XO_SUBMODULE_BUILD) - if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix)) - # local version of xo-cmake macros - set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/xo-cmake/cmake") - message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") - endif() -else() - if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix)) - # default to typical install location for xo-project-macros - execute_process(COMMAND ${XO_CMAKE_CONFIG_EXECUTABLE} --cmake-module-path OUTPUT_VARIABLE CMAKE_MODULE_PATH) - message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") - endif() -endif() - -# needs to have been installed somewhere on CMAKE_MODULE_PATH, -# (e.g. from xo-cmake with the same value for CMAKE_INSTALL_PREFIX) -# -include(xo_macros/xo_cxx) - -xo_cxx_bootstrap_message() diff --git a/.xo-statistics/cmake/xo_statisticsConfig.cmake.in b/.xo-statistics/cmake/xo_statisticsConfig.cmake.in deleted file mode 100644 index eb9a3b16..00000000 --- a/.xo-statistics/cmake/xo_statisticsConfig.cmake.in +++ /dev/null @@ -1,5 +0,0 @@ -@PACKAGE_INIT@ - -include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") -include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Share.cmake") -check_required_components("@PROJECT_NAME@") diff --git a/.xo-statistics/include/xo/statistics/Accumulator.hpp b/.xo-statistics/include/xo/statistics/Accumulator.hpp deleted file mode 100644 index 69763e38..00000000 --- a/.xo-statistics/include/xo/statistics/Accumulator.hpp +++ /dev/null @@ -1,10 +0,0 @@ -/* @file Accumulator.hpp */ - -namespace xo { - nmaespace statistics { - class Accumulator { - }; /*Accumulator*/ - } /*namespace statistics*/ -} /*namespace xo*/ - -/* end Accumulator.hpp */ diff --git a/.xo-statistics/include/xo/statistics/Histogram.hpp b/.xo-statistics/include/xo/statistics/Histogram.hpp deleted file mode 100644 index d2219f23..00000000 --- a/.xo-statistics/include/xo/statistics/Histogram.hpp +++ /dev/null @@ -1,217 +0,0 @@ -/* @file Histogram.hpp */ - -#pragma once - -#include "statistics/SampleStatistics.hpp" -#include "logutil/scope.hpp" -#include -#include -#include - -namespace xo { - namespace statistics { - /* sample statistics for a histogram bucket - * (editorial: compare with distribution::Counter) - */ - class Bucket { - public: - Bucket() = default; - Bucket(uint32_t n_sample, double sum, double mean, double mom2) - : n_sample_(n_sample), sum_(sum), mean_(mean), moment2_(mom2) {} - - uint32_t n_sample() const { return n_sample_; } - double sum() const { return sum_; } - double mean() const { return mean_; } - double sample_variance() const { return (n_sample_ > 1) ? moment2_ / (n_sample_ - 1) : 0.0; } - double standard_error() const { return ::sqrt(this->sample_variance()); } - - /* to estimate standard error of the mean: - * 0. let nk = .n_sample be the #of samples falling into this bin. - * n is the total #of samples across all bins. - * (i.e. Histogram.n_sample) - * 1. imagine probability of a sample falling in this bin - * is the observed frequency p = (.n_sample / n) - * 2. imagine a Bernoulli random variable Bp(i) associated with each sample x(i) - * {1, with probability p; 0 with probability q=1-p}) - * 3. each Bp(i) has mean p, variance p(1-p) - * 4. sum of the Bp(1) .. Bp(n) has mean n.p = nk, - * variance - * n.p.(1-p) - * = n.(nk/n).(1 - nk/n) - * = nk.(1 - nk/n) - * (by central limit theorem we can treat this as approximately normal - * for sufficiently large n) - * 5. standard error of Sum{Bp(i)} - * will be - * sqrt(nk.(1 - nk/n)) - */ - double n_sample_stderr(uint32_t n) const { - double nr = 1.0 / n; - uint32_t nk = this->n_sample_; - - return ::sqrt(nk * (1.0 - nk * nr)); - } /*n_sample_stderr*/ - - /* add one sample, x, to this bucket */ - void include_sample(double x) { - using logutil::scope; - using logutil::xtag; - - constexpr char const * c_self = "Bucket::include_sample"; - constexpr bool c_logging_enabled = false; - - /* size of sample _before_ adding x */ - int n = this->n_sample_; - - this->n_sample_ = n+1; - this->sum_ += x; - - double mean_n = this->mean_; - double mom2_n = this->moment2_; - double mean_np1 = SampleStatistics::update_online_mean(x, n, mean_n); - double mom2_np1 = SampleStatistics::update_online_moment2(x, - mean_np1, mean_n, - mom2_n); - scope lscope(c_self, c_logging_enabled); - if(c_logging_enabled) { - lscope.log("update", - xtag("x", x), xtag("n", n), - xtag("sum", sum_), - xtag("mean(n)", mean_n), - xtag("mom2(n)", mom2_n), - xtag("mean(n+1)", mean_np1), - xtag("mom2(n+1)", mom2_np1)); - } - - this->mean_ = mean_np1; - this->moment2_ = mom2_np1; - } /*include_sample*/ - - private: - /* #of samples in this bucket (will be #of times .sample() has been called) */ - uint32_t n_sample_ = 0; - /* sum of samples in this bucket */ - double sum_ = 0.0; - /* mean of values in this bucket - * -- use online algo to avoid catastrophic errors for large #samples - */ - double mean_ = 0.0; - double moment2_ = 0.0; - }; /*Bucket*/ - - /* accumulate histogram on sampled data */ - class Histogram { - public: - using const_iterator = std::vector::const_iterator; - - public: - Histogram(uint32_t n_interior_bucket, double lo_bucket, double hi_bucket) - : n_interior_bucket_(n_interior_bucket), - lo_bucket_(lo_bucket), - hi_bucket_(hi_bucket), - bucket_v_(n_interior_bucket + 2) - {} - - uint32_t n_sample() const { return n_sample_; } - uint32_t n_bucket() const { return n_interior_bucket_ + 2; } - - double bucket_width() const { return (this->hi_bucket_ - this->lo_bucket_) / this->n_interior_bucket_; } - - const_iterator begin() const { return bucket_v_.begin(); } - const_iterator end() const { return bucket_v_.end(); } - Bucket const & lookup(uint32_t ix) const { return this->bucket_v_[ix]; } - - /* compute bucket representing pooled sample combining - * contents of buckets [lo .. hi) - */ - Bucket pooled(uint32_t lo, uint32_t hi) const { - /* NOTE: for pooled bucket, may want to compute "reliability variance", - * i.e. report - * M2 / (N - (sum(nk^2) / N)) - * instead of - * M2 / (N - 1) - */ - - uint32_t n_sample = 0; - double sum = 0.0; - double mean = 0.0; - double mom2 = 0.0; - - for(uint32_t i = lo; ilookup(i); - - n_sample += bucket.n_sample(); - /* note that sum is not numerically well-behaved if summing - * over a large #of buckets - */ - sum += bucket.sum(); - - double prev_mean = mean; - /* relative weight of bucket b(i) relative to pooled statistics - * from buckets b(lo) .. b(i-1) - */ - double wt = (bucket.n_sample() / static_cast(n_sample)); - - /* similar to SampleStatistics::update_online_mean() */ - mean = prev_mean + wt * (bucket.mean() - prev_mean); - /* similar to SampleStatistics::update_online_moment2() */ - mom2 = (mom2 + (bucket.n_sample() - * (bucket.mean() - prev_mean) - * (bucket.mean() - mean))); - } - - return Bucket(n_sample, sum, mean, mom2); - } /*pooled*/ - - double bucket_lo_edge(uint32_t ix) const { - if(ix == 0) { - return -std::numeric_limits::infinity(); - } else { - return this->lo_bucket_ + (ix - 1) * this->bucket_width(); - } - } /*bucket_lo_edge*/ - - double bucket_hi_edge(uint32_t ix) const { - if(ix < n_interior_bucket_ + 1) - return this->lo_bucket_ + ix * this->bucket_width(); - else - return std::numeric_limits::infinity(); - } /*bucket_hi_edge*/ - - /* index (into .bucket_v[]) of bucket to use for a sample with value x */ - uint32_t bucket_ix(double x) const { - if(x < this->lo_bucket_) - return 0; - - if(x < this->hi_bucket_) - return 1 + static_cast((x - this->lo_bucket_) / this->bucket_width()); - - return this->n_interior_bucket_ + 1; - } /*bucket_ix*/ - - void include_sample(double x) { - uint32_t ix = this->bucket_ix(x); - - ++(this->n_sample_); - this->bucket_v_[ix].include_sample(x); - } /*include_sample*/ - - private: - /* #of samples across all buckets */ - uint32_t n_sample_ = 0; - /* #of interior buckets: split [.lo_bucket, .hi_bucket] into - * equally-spaced intervals of width (.hi_bucket - .lo_bucket) / .n_bucket - */ - uint32_t n_interior_bucket_ = 0; - /* right edge of first bucket (left edge is -oo) */ - double lo_bucket_ = 0.0; - /* left edge of last bucket (right edge is +oo) */ - double hi_bucket_ = 0.0; - - /* hisogram buckets */ - std::vector bucket_v_; - }; /*Histogram*/ - } /*namespace statistics*/ -} /*namespace xo*/ - -/* end Histogram.hpp */ diff --git a/.xo-statistics/include/xo/statistics/SampleStatistics.hpp b/.xo-statistics/include/xo/statistics/SampleStatistics.hpp deleted file mode 100644 index a7d595b0..00000000 --- a/.xo-statistics/include/xo/statistics/SampleStatistics.hpp +++ /dev/null @@ -1,130 +0,0 @@ -/* @file SampleStatistics.hpp */ - -#pragma once - -#include - -namespace xo { - namespace statistics { - /* accumlate statistics online for a sample */ - class SampleStatistics { - public: - SampleStatistics() = default; - - /* given we have a sample S(n) of size n with given mean, - * compute mean of sample with one event x added - * - * n. #of samples *preceding* x - */ - static double update_online_mean(double x, uint32_t n, double mean) { - /* to update mean in a numerically stable way: - * avoid computing running sample sum, to avoid - * adding floating point numbers with distant magnitudes; - * instead compute correction to the mean directly - * - * n / x(i) \ - * mean(Sn) := Sum | ----- | - * i=1 \ n / - * - * so - * n+1 / x(i) \ - * mean(S(n+1)) = Sum | ----- | - * i=1 \ n+1 / - * - * n n+1 / x(i) \ - * = --- Sum | ----- | - * n+1 i=1 \ n / - * - * n / x(n+1) n x(i) \ - * = --- | ------ + Sum ---- | - * n+1 \ n i=1 n / - * - * x(n+1) / n \ - * = ------ + | --- . mean(S(n)) | - * n+1 \ n+1 / - * - * x(n+1) / -1 \ - * = ------ + mean(S(n)) + | --- . mean(S(n)) | - * n+1 \ n+1 / - * - * = mean(S(n)) + (x(n+1) - mean(S(n))) / (n+1) - */ - return mean + ((1.0 / (n+1)) * (x - mean)); - } /*update_online_mean*/ - - /* - * with S(n) = Sn = {set of n samples}, - * u(n) = mean(Sn) - * - * (with mean, variance meaning "estimate for") - * - * 1 n / 2 \ / 1 \ 2 - * variance(Sn) := --- . Sum | (x(i) | - | --- . Sum x(i) | - * n i=1 \ / \ n i=1 / - * - * using Welford's recurrence for 2nd moment: - * - * define - * M2(n+1) := M2(n) + (x(n+1) - mean(S(n))) - * . (x(n+1) - mean(S(n+1)) - * - * then unbiased variance estimate for S(n+1) is: - * - * M2(n+1) - * ------- - * n - * - * x. new sample value - * mean_np1. mean estimate for S(n+1) - * mean_n. mean estimate for S(n) - * moment2. 2nd moment for S(n) - */ - static double update_online_moment2(double x, - double mean_np1, double mean_n, - double moment2) - { - return moment2 + (x - mean_n) * (x - mean_np1); - } /*update_online_moment2*/ - - uint32_t n_sample() const { return n_sample_; } - double mean() const { return mean_; } - double moment2() const { return moment2_; } - /* 'sample variance' = variance estimate, - * applying Bessel correction for sample bias - * - * require: n_sample >= 2 - */ - double sample_variance() const { return moment2_ / (n_sample_ - 1); } - - /* biased variance estimate - * = (1 - 1/(n+1)) * .sample_variance() - * - * .variance() -> .sample_variance() as sample size -> +oo - * - * require: n_sample >= 1 - */ - double variance() const { return moment2_ / n_sample_; } - - void include_sample(double x) { - /* n+1 */ - uint32_t np1 = this->n_sample_ + 1; - - double mean_np1 = update_online_mean(x, this->n_sample_, this->mean_); - double moment2_np1 = update_online_moment2(x, this->mean_, mean_np1, this->moment2_); - - this->n_sample_ = np1; - this->mean_ = mean_np1; - this->moment2_ = moment2_np1; - } /*include_sample*/ - - private: - uint32_t n_sample_ = 0; - /* estimated mean */ - double mean_ = 0.0; - /* estimated 2nd moment E[X^2] */ - double moment2_ = 0.0; - }; /*SampleStatistics*/ - } /*namespace statistics*/ -} /*namespace xo*/ - -/* end SampleStatistics.hpp */