Add 'xo-distribution/' from commit '036ca5d817'

git-subtree-dir: xo-distribution git-subtree-mainline: 4e022df686 git-subtree-split: 036ca5d817
2025-05-11 15:52:36 -05:00 · 2025-05-11 15:52:36 -05:00 · ecd019e8bb
commit ecd019e8bb
parent 4e022df686 036ca5d817
18 changed files with 1644 additions and 0 deletions
--- a/xo-distribution/.gitignore
+++ b/xo-distribution/.gitignore
@ -0,0 +1,6 @@
+# clangd working space (see emacs+lsp)
+.cache
+# typical cmake build directory (source-tree-nephew)
+.build*
+# symlink to builddir/compile_commands.json;  should be set manually in dev sandbox
+compile_commands.json
--- a/xo-distribution/CMakeLists.txt
+++ b/xo-distribution/CMakeLists.txt
@ -0,0 +1,16 @@
+# xo-distribution/CMakeLists.txt
+
+cmake_minimum_required(VERSION 3.10)
+
+project(xo_distribution VERSION 1.0)
+
+include(GNUInstallDirs)
+include(cmake/xo-bootstrap-macros.cmake)
+
+xo_cxx_toplevel_options3()
+
+#add_subdirectory(example)
+add_subdirectory(src/distribution)  # note refcnt dep -> not header-only
+add_subdirectory(utest)
+
+xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets)
--- a/xo-distribution/cmake/xo-bootstrap-macros.cmake
+++ b/xo-distribution/cmake/xo-bootstrap-macros.cmake
@ -0,0 +1,35 @@
+# ----------------------------------------------------------------
+# for example:
+#   $ PREFIX=/usr/local   # for example
+#   $ cmake -DCMAKE_MODULE_PATH=prefix -DCMAKE_INSTALL_PREFIX=$PREFIX -B .build
+#
+# will get
+#   CMAKE_MODULE_PATH
+# from xo-cmake-config --cmake-module-path
+#
+# and expect .cmake macros in
+#   CMAKE_MODULE_PATH/xo_macros/xo_cxx.cmake
+# ----------------------------------------------------------------
+
+find_program(XO_CMAKE_CONFIG_EXECUTABLE NAMES xo-cmake-config REQUIRED)
+
+if ("${XO_CMAKE_CONFIG_EXECUTABLE}" STREQUAL "XO_CMAKE_CONFIG_EXECUTABLE-NOT_FOUND")
+    message(FATAL "could not find xo-cmake-config executable")
+endif()
+
+message(STATUS "XO_CMAKE_CONFIG_EXECUTABLE=${XO_CMAKE_CONFIG_EXECUTABLE}")
+
+if (NOT XO_SUBMODULE_BUILD)
+    if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix))
+        # default to typical install location for xo-project-macros
+        execute_process(COMMAND ${XO_CMAKE_CONFIG_EXECUTABLE} --cmake-module-path OUTPUT_VARIABLE CMAKE_MODULE_PATH)
+        message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}")
+    endif()
+endif()
+
+# needs to have been installed somewhere on CMAKE_MODULE_PATH,
+# (e.g. from xo-cmake with the same value for CMAKE_INSTALL_PREFIX)
+#
+include(xo_macros/xo_cxx)
+
+xo_cxx_bootstrap_message()
--- a/xo-distribution/cmake/xo_distributionConfig.cmake.in
+++ b/xo-distribution/cmake/xo_distributionConfig.cmake.in
@ -0,0 +1,6 @@
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro)
+find_dependency(refcnt)
+include("${CMAKE_CURRENT_LIST_DIR}/xo_distributionTargets.cmake")
+check_required_components("@PROJECT_NAME@")
--- a/xo-distribution/include/xo/distribution/Distribution.hpp
+++ b/xo-distribution/include/xo/distribution/Distribution.hpp
@ -0,0 +1,20 @@
+/* @file Distribution.hpp */
+
+#pragma once
+
+#include "xo/refcnt/Refcounted.hpp"
+
+namespace xo {
+    namespace distribution {
+        /* abstract api for a cumulative probability distribution.
+         * over supplied Domain
+         */
+        template<typename Domain>
+        class Distribution : public ref::Refcount {
+        public:
+            virtual double cdf(Domain const & x) const = 0;
+        }; /*Distribution*/
+    } /*namespace distribution*/
+} /*namespace xo*/
+
+/* end Distribution.hpp */
--- a/xo-distribution/include/xo/distribution/Empirical.hpp
+++ b/xo-distribution/include/xo/distribution/Empirical.hpp
@ -0,0 +1,41 @@
+/* @file Empirical.hpp */
+
+#pragma once
+
+#include "xo/distribution/Distribution.hpp"
+#include "xo/ordinaltree/RedBlackTree.hpp"
+#include "xo/indentlog/scope.hpp"
+#include <map>
+#include <cstdint>
+
+namespace xo {
+    namespace distribution {
+
+        /* representation for counter,
+         * recording #of samples with the same value
+         */
+        using CounterRep = uint32_t;
+
+        /* counter;  for use with StdEmpirical distribution below
+         */
+        class Counter {
+        public:
+            Counter() = default;
+            Counter(CounterRep n) : count_(n) {}
+
+            CounterRep count() const { return count_; }
+
+            void incr() { ++count_; }
+
+            operator CounterRep () const { return count_; }
+
+            Counter & operator+=(CounterRep n) { count_ += n; return *this; }
+
+        private:
+            CounterRep count_ = 0;
+        }; /*Counter*/
+
+    } /*namespace disitribution*/
+} /*namespace xo*/
+
+/* end Empirical.hpp */
--- a/xo-distribution/include/xo/distribution/ExplicitDist.hpp
+++ b/xo-distribution/include/xo/distribution/ExplicitDist.hpp
@ -0,0 +1,748 @@
+/* file ExplicitDist.hpp
+ *
+ * author: Roland Conybeare, Oct 2022
+ */
+
+#pragma once
+
+#include "Distribution.hpp"
+#include "Normal.hpp"
+#include "xo/indentlog/scope.hpp"
+#include "xo/indentlog/print/vector.hpp"
+#include "xo/indentlog/print/tostr.hpp"
+#include <vector>
+#include <cmath>
+#include <cstdint>
+
+namespace xo {
+    using xo::xtag;
+
+    namespace distribution {
+        class ProbabilityBucket {
+        public:
+            ProbabilityBucket() = default;
+
+            double weight() const { return wt_; }
+            double cdf() const { return cdf_; }
+
+            /* note: when calling this, must invalidate ExplicitDist.cdf_valid_flag */
+            void scale_weight(double k) { this->wt_ *= k; }
+
+            void assign_weight(double w) { this->wt_ = w; }
+            /* implementation method: only ExplicitDist.renormalize() should call this */
+            void assign_cdf(double x) { this->cdf_ = x; }
+
+            void display(std::ostream & os) const {
+                using xo::xtag;
+
+                os << "<ProbabilityBucket"
+                   << xtag("wt", this->wt_)
+                   << xtag("cdf", this->cdf_)
+                   << ">";
+            } /*display*/
+
+        private:
+            /* probability assigned to this bucket.
+             * updated in-place by ExplicitDist.include_sample()
+             */
+            double wt_ = 0.0;
+            /* cumulative probability assigned to this bucket b
+             * and all buckets b[i] for domain values < b
+             *
+             * invalidated by ExplicitDist.scale_bucket()
+             * see ExplicitDist.
+             */
+            double cdf_ = 0.0;
+        }; /*ProbabilityBucket*/
+
+        inline std::ostream & operator<<(std::ostream & os, ProbabilityBucket const & x) {
+            x.display(os);
+            return os;
+        } /*operator<<*/
+
+        namespace detail {
+            /* three kinds of index here:
+             * 1. lo:     non-negative index into ExplicitDist.lo_v_[]
+             * 2. hi:     non-negative index into ExplicitDist.hi_v_[]
+             * 3. signed: if >=0: index into ExplicitDist.hi_v_[]
+             *            if  <0: index into ExplicitDist.lo_v_[]
+             *
+             * with lz=.lo_v.size(), hz=hi_v.size():
+             *
+             *                            .hi_v[0]     .hi_v[hz-1]
+             *                          | v            v
+             *                          |
+             *        +--+--+     +--+--|--+--+      +--+
+             *        |  |  | ... |  |  |  |  |  ... |  |
+             *        +--+--+     +--+--|--+--+      +--+
+             *          ^              ^
+             *          .lo_v[lz-1]    .lo_v[0]
+             *
+             *          ^              ^  ^            ^
+             * signed -lz             -1| 0      ...   hz-1
+             */
+            class ExplicitDistIndexUtil {
+            public:
+                static size_t signed2hi(int32_t signed_ix) { return signed_ix; }
+                static size_t signed2lo(int32_t signed_ix) { return -signed_ix - 1; }
+                static int32_t lo2signed(size_t lo_ix) { return -static_cast<int32_t>(lo_ix)-1; }
+                static int32_t hi2signed(size_t hi_ix) { return hi_ix; }
+
+                static ProbabilityBucket const * signed2bucket_c(int32_t signed_ix,
+                                                                 std::vector<ProbabilityBucket> const * lo_v,
+                                                                 std::vector<ProbabilityBucket> const * hi_v) {
+                    if (signed_ix >= 0) {
+                        size_t hi_ix = signed2hi(signed_ix);
+
+                        if (hi_v && (hi_ix < hi_v->size()))
+                            return &(*hi_v)[hi_ix];
+                    } else {
+                        size_t lo_ix = signed2lo(signed_ix);
+
+                        if (lo_v && (lo_ix < lo_v->size()))
+                            return &(*lo_v)[lo_ix];
+                    }
+
+                    return nullptr;
+                } /*signed2bucket_c*/
+
+                static ProbabilityBucket * signed2bucket(int32_t signed_ix,
+                                                         std::vector<ProbabilityBucket> * lo_v,
+                                                         std::vector<ProbabilityBucket> * hi_v) {
+                    ProbabilityBucket const * b = signed2bucket_c(signed_ix, lo_v, hi_v);
+                    return const_cast<ProbabilityBucket *>(b);
+                } /*signed2bucket*/
+
+            }; /*ExplicitDistIndexUtil*/
+
+            class ExplicitDistIterator : public ExplicitDistIndexUtil {
+            public:
+                ExplicitDistIterator() = default;
+                ExplicitDistIterator(int32_t signed_ix,
+                                     std::vector<ProbabilityBucket> * lo_v,
+                                     std::vector<ProbabilityBucket> * hi_v)
+                    : signed_ix_{signed_ix}, p_lo_v_{lo_v}, p_hi_v_{hi_v} {}
+
+                ProbabilityBucket & operator*() {
+                    ProbabilityBucket * pb = signed2bucket(this->signed_ix_,
+                                                           this->p_lo_v_,
+                                                           this->p_hi_v_);
+                    if (!pb) {
+                        throw std::runtime_error("ExplicitDistIterator: attempt to deref invalid iterator");
+                    }
+
+                    return *pb;
+                } /*operator**/
+
+                ExplicitDistIterator & operator++() { ++(this->signed_ix_); return *this; }
+                ExplicitDistIterator & operator--() { --(this->signed_ix_); return *this; }
+
+            private:
+                /* signed iterator */
+                int32_t signed_ix_ = 0;
+                /* will be ExplicitDist.lo_v[] */
+                std::vector<ProbabilityBucket> * p_lo_v_ = nullptr;
+                /* will be ExplicitDist.hi_v[] */
+                std::vector<ProbabilityBucket> * p_hi_v_ = nullptr;
+            }; /*ExplicitDistIterator*/
+        } /*namespace detail*/
+
+        /* explicit distribution with buckets.
+         * (if you want to avoid bucketing at the expense of memory,
+         *  see StdEmpirical)
+         *
+         * sample-independent buckets can be faster to the extent
+         *    {#of buckets} << {#of samples}
+         *
+         * in particular
+         *                                 | StdEmpirical | PackedEmpirical
+         *   ------------------------------+--------------+-----------------
+         *   update existing sample/bucket |   O(log(n))  |    O(1)
+         *   create new sample/bucket      |   O(log(n))  |    O(log(n))
+         *   cdf                           |   O(log(n))  |    O(n)
+         *
+         * PackedEmpirical offers scaling + renormalization
+         *
+         * since .cdf() is slow,  .ks_stat_1sided() is supported only in StdEmpirical.
+         * (could generalize in future to some other implementation with fast .cdf())
+         *
+         * Require:
+         *   Domain is something with metric,  e.g. int|double.
+         *   categorical domain not supported here.
+         *
+         * see also: statistics/Histogram.  Histogram keeps more per-bucket statistics
+         */
+        template<typename Domain>
+        class ExplicitDist : public Distribution<Domain>, public detail::ExplicitDistIndexUtil {
+        public:
+            using WeightVector = std::vector<ProbabilityBucket>;
+            using iterator = detail::ExplicitDistIterator;
+
+        public:
+            static rp<ExplicitDist> make(Domain bucket_dx, Domain ref_value) {
+                return new ExplicitDist(bucket_dx, ref_value);
+            }
+            /* create distribution with n buckets of width bucket_dx,
+             * covering range [ref_value, ref_value + n * bucket_dx]
+             */
+            static rp<ExplicitDist> make_n(size_t n, Domain bucket_dx, Domain ref_value) {
+                return new ExplicitDist(n, bucket_dx, ref_value);
+            }
+
+            size_t n_bucket() const { return this->lo_v_.size() + this->hi_v_.size(); }
+
+            /* lub domain value with .cdf(lo) = 0 */
+            Domain lo() const {
+                std::size_t lz = this->lo_v_.size();
+
+                return this->ref_value_ - lz * this->bucket_dx_;
+            } /*lo*/
+
+            /* glb domain value with .cdf(hi) = 1 */
+            Domain hi() const {
+                std::size_t hz = this->hi_v_.size();
+
+                return this->ref_value_ + hz * this->bucket_dx_;
+            } /*hi*/
+
+            Domain bucket_lo(int32_t signed_ix) const {
+                return this->ref_value_ + signed_ix * this->bucket_dx_;
+            } /*bucket_lo*/
+
+            Domain bucket_mid(int32_t signed_ix) const {
+                return this->ref_value_ + (0.5 + signed_ix) * this->bucket_dx_;
+            } /*bucket_mid*/
+
+            Domain bucket_hi(int32_t signed_ix) const {
+                return this->bucket_lo(signed_ix + 1);
+            } /*bucket_hi*/
+
+            iterator begin() { return iterator(lo2signed(this->lo_v_.size() - 1),
+                                               &(this->lo_v_),
+                                               &(this->hi_v_)); }
+            iterator end() { return iterator(hi2signed(this->hi_v_.size()),
+                                             &(this->lo_v_),
+                                             &(this->hi_v_)); }
+
+            /* probability density at domain value x */
+            double density(Domain const & x) const {
+                /* careful!  may need to renormalize */
+                {
+                    ExplicitDist<Domain> * self = const_cast<ExplicitDist<Domain> *>(this);
+
+                    self->check_renormalize();
+                }
+
+                auto v = this->lookup_bucket(x);
+                ProbabilityBucket const * b = v.first;
+
+                if (b) {
+                    /* probability density is constant within each bucket */
+                    return b->weight() / this->bucket_dx_;
+                } else {
+                    return 0.0;
+                }
+            } /*density*/
+
+            /* pair {bucket glb, density} for all buckets,  in increasing domain order */
+            std::vector<std::pair<Domain, double>> density_v() const {
+                /* careful! may need to renormalize */
+                {
+                    ExplicitDist<Domain> * self = const_cast<ExplicitDist<Domain> *>(this);
+
+                    self->check_renormalize();
+                }
+
+                size_t n = this->n_bucket();
+                std::vector<std::pair<Domain, double>> retval(n);
+
+                double w2d = 1.0 / this->bucket_dx_;
+
+                for(size_t i=0; i<n; ++i) {
+                    Domain lo = this->bucket_lo(i);
+                    ProbabilityBucket const * b = this->lookup_signed_bucket(i);
+
+                    assert(b);
+
+                    double density = b->weight() * w2d;
+
+                    retval[i] = std::make_pair(lo, density);
+                }
+
+                return retval;
+            } /*density_v*/
+
+            /* return:
+             *   .first   signed bucket index ix;
+             *            refers to .lo_v[1-ix] if ix<0;
+             *            refers to .hi_v[+ix] if ix>=0
+             *   .second  fractional weight within bucket associated with x.
+             *            not scaled by ProbabilityWeight.weight
+             */
+            std::pair<int32_t, double> signed_bucket_index(Domain const & x) const {
+                double ix_f = ::floor((x - this->ref_value_) / this->bucket_dx_);
+                int32_t ix = ix_f;
+
+                /*
+                 *           ^
+                 *           |............
+                 *           |    :      |
+                 *           | wt :      | unit area
+                 *           |    :      |
+                 *           +-----------+
+                 *   bucket_lo    ^    bucket_lo + .bucket_dx
+                 *                x
+                 */
+
+                Domain bucket_lo = this->ref_value_ + (ix * this->bucket_dx_);
+                double wt = (x - bucket_lo) / this->bucket_dx_;
+
+                return std::make_pair(ix, wt);
+            } /*signed_bucket_index*/
+
+            ProbabilityBucket const * lookup_signed_bucket(int32_t signed_ix) const {
+                return signed2bucket_c(signed_ix, &(this->lo_v_), &(this->hi_v_));
+            } /*lookup_signed_bucket*/
+
+            /* non-const version with write access */
+            ProbabilityBucket * lookup_signed_bucket(int32_t signed_ix) {
+                return signed2bucket(signed_ix, &(this->lo_v_), &(this->hi_v_));
+            } /*lookup_signed_bucket*/
+
+            /* return null pointer if bucket that would contain x not represented
+             *   .second is fractional weight (relative to unity) within selected bucket
+             */
+            std::pair<ProbabilityBucket const *, double> lookup_bucket(Domain const & x) const {
+                /* signed index.
+                 *   ix>=0 => .lo_v[]
+                 *   ix< 0 => .hi_v[]
+                 */
+                std::pair<int32_t, double> v = this->signed_bucket_index(x);
+
+                int32_t ix = v.first;
+                double fraction_wt = v.second;
+                ProbabilityBucket const * pw = this->lookup_signed_bucket(ix);
+
+                return std::make_pair(pw, fraction_wt);
+            } /*lookup_bucket*/
+
+            /* non-const version */
+            std::pair<ProbabilityBucket *, double> lookup_bucket(Domain const & x) {
+                ExplicitDist<Domain> const * c_self = this;
+
+                std::pair<ProbabilityBucket const *, double> v = c_self->lookup_bucket(x);
+                ProbabilityBucket * b = const_cast<ProbabilityBucket *>(v.first);
+                double fraction_wt = v.second;
+
+                return std::make_pair(b, fraction_wt);
+            } /*lookup_bucket*/
+
+            /* like .lookup_bucket(),  but create bucket if not already present
+             * .second reports fractional weight (relative to unity) within bucket
+             * that contains x.
+             */
+            std::pair<ProbabilityBucket*, double> establish_bucket(Domain const & x) {
+                /* signed index.
+                 *   ix>=0 => .lo_v[]
+                 *   ix< 0 => .hi_v[]
+                 */
+                std::pair<int32_t, double> v = this->signed_bucket_index(x);
+
+                int32_t ix = v.first;
+                double fraction_wt = v.second;
+                ProbabilityBucket * pw = nullptr;
+
+                if (ix >= 0) {
+                    size_t hi_ix = signed2hi(ix);
+
+                    if (hi_ix >= this->hi_v_.size()) {
+                        /* need to expand .hi_v[] */
+                        this->hi_v_.resize(hi_ix+1);
+                    }
+
+                    pw = &(this->hi_v_[hi_ix]);
+                } else {
+                    size_t lo_ix = signed2lo(ix);
+
+                    if (lo_ix >= this->lo_v_.size()) {
+                        /* need to expand .lo_v[] */
+                        this->lo_v_.resize(lo_ix+1);
+                    }
+
+                    pw = &(this->lo_v_[lo_ix]);
+                }
+
+                return std::make_pair(pw, fraction_wt);
+            } /*establish_bucket*/
+
+            void scale_bucket_by_signed_index(int32_t signed_ix, double k) {
+                ProbabilityBucket * b = this->lookup_signed_bucket(signed_ix);
+
+                if (b) {
+                    b->scale_weight(k);
+
+                    this->cdf_valid_flag_ = false;
+                } else {
+                    /* if bucket isn't represented,  then corresponding weight is zero,
+                     * both before and after scaling.  In this special case
+                     * representation didn't change -> don't invalidate cdf
+                     */
+                }
+            } /*scale_bucket_by_signed_index*/
+
+            /* scale probability assigned to bucket for domain value x, by factor k */
+            void scale_bucket(Domain const & x, double k) {
+                assert(k >= 0.0);
+
+                std::pair<int32_t, double> v = this->signed_bucket_index(x);
+
+                this->scale_bucket_by_signed_index(v.first, k);
+            } /*scale_bucket*/
+
+            /* scale probability weight for buckets in [lo, hi] by fn(p)
+             * for a point p in each bucket,  however:
+             * under no circumstances try to evaluate fn() outside [lo, hi]
+             * (relevant if either lo/hi fall inside a bucket)
+             */
+            template<typename Function>
+            void scale_interval(Domain const & lo,
+                                Domain const & hi,
+                                Function && fn)
+                {
+                    XO_SCOPE_DISABLED(lscope);
+
+                    /* note: using inclusive upper index bounds here;
+                     *       variying from idiomatic c++ style for symmetry
+                     */
+
+                    int32_t min_bucket_ix = lo2signed(this->lo_v_.size() - 1);
+                    int32_t max_bucket_ix = hi2signed(this->hi_v_.size() - 1);
+
+                    int32_t lo_ix = this->signed_bucket_index(lo).first;
+                    int32_t hi_ix = this->signed_bucket_index(hi).first;
+
+                    /* start_ix: lowest explicit bucket associating with [lo, hi) */
+                    int32_t start_ix = std::max(min_bucket_ix, lo_ix + 1);
+                    /* end_ix: highest explicit bucket associating with [lo, hi) */
+                    int32_t end_ix = std::min(max_bucket_ix, hi_ix);
+
+                    if (lscope.enabled()) {
+                        lscope.log(xtag("min_bucket_ix", min_bucket_ix),
+                                   xtag("max_bucket_ix", max_bucket_ix),
+                                   xtag("lo_ix", lo_ix),
+                                   xtag("hi_ix", hi_ix),
+                                   xtag("start_ix", start_ix),
+                                   xtag("end_ix", end_ix));
+                    }
+
+                    /* for endpoints:  avoid evaluating fn() outside [lo, hi] */
+                    if (min_bucket_ix <= lo_ix) {
+                        double lo_k = fn(lo);
+
+                        if (lscope.enabled())
+                            lscope.log("A", xtag("lo_ix", lo_ix), xtag("lo_k", lo_k));
+
+                        this->scale_bucket_by_signed_index(lo_ix, lo_k);
+                    }
+
+                    for(int32_t ix = start_ix; ix <= end_ix; ++ix) {
+                        double k = fn(this->bucket_mid(ix));
+
+                        if (lscope.enabled())
+                            lscope.log("B", xtag("ix", ix), xtag("k", k));
+
+                        this->scale_bucket_by_signed_index(ix, k);
+                    }
+
+                    /* for endpoints: avoid evaluating fn() outside [lo, hi] */
+                    if (hi_ix <= max_bucket_ix) {
+                        double hi_k = fn(hi);
+
+                        if (lscope.enabled())
+                            lscope.log("C", xtag("hi_ix", hi_ix), xtag("hi_k", hi_k));
+
+                        this->scale_bucket_by_signed_index(hi_ix, hi_k);
+                    }
+                } /*scale_interval*/
+
+            template<typename Function>
+            void scale_all(Function && fn)
+                {
+                    this->scale_interval(this->lo(), this->hi(), fn);
+                } /*scale_all*/
+
+            /* convenience: scale by scaled normal cdf.
+             *
+             * support use case where:
+             * 1. explicit dist represents distribution of asset value;
+             * 2. assume one party A to trade/order has noisy signal,
+             *    with normally-distributed error around (unknown) true value s,
+             *    with variance o^2;
+             * 3. observe a trade/order at some price p,
+             *    giving us one-sided information about A's knowledge in two scenarios:
+             *    i.  A trades
+             *    ii. A does not trade
+             *
+             * We will be applying Bayes' rule to update prior.
+             * If buy(p) = {A buys},
+             *   N(x) is cumulative normal distribution:
+             *     N(x) -> 0 as x -> -oo;
+             *     N(x) -> 1 as x -> +oo;
+             *
+             *                        -1/2        2
+             *     (d/dx)N(x) = (2.pi)    . exp(-x /2)
+             * then:
+             *                     P{s=s'}
+             *   P{s=s'|buy(p)} = ---------.P{buy(p)|s=s'}
+             *                    P{buy(p)}
+             *
+             *                     P{s=s'}
+             *                  = ---------.N[(1/o)(s'-p)],
+             *                    P{buy(p)}
+             *
+             * In this scenario, P{s=s'} is our prior,  represented by distribution *this.
+             * The unconditional probability P{buy(p)} is not a function of s',  so:
+             *
+             *                /
+             *                |
+             *    P{buy(p)} = | P{s=s'} . N[(1/o)(s'-p)] . ds'
+             *                |
+             *                /
+             *
+             * in other words can scale explicit prior *this by N[(1/o)(s'-p)] then renormalize
+             * so total probability weight is 1.
+             *
+             * Conversely,  if event is {A sells}, similar argument leads to scaling
+             * by N[(1/o)(p-s')]
+             *
+             * sign.  scale by N[(1/sigma).sign.(x-mean)]
+             */
+            void scale_by_normal_cdf(int sign, Domain const & mean, Domain const & sigma) {
+                auto fn([sign, mean, sigma](Domain const & s) {
+                    return Normal::cdf_impl(sign * (s - mean) / sigma);
+                });
+
+                this->scale_all(fn);
+            } /*scale_by_normal_cdf*/
+
+            // ----- inherited from Distribution<Domain> -----
+
+            /* note: marked const;  actually "logically const" */
+            virtual double cdf(Domain const & x) const override {
+                /* .cdf() is slow here,  because partial sums aren't stored
+                 * --> have to sum over O(n) buckets
+                 */
+                {
+                    ExplicitDist<Domain> * self = const_cast<ExplicitDist<Domain> *>(this);
+
+                    self->check_renormalize();
+                }
+
+                std::pair<ProbabilityBucket const *, double> v = this->lookup_bucket(x);
+
+                ProbabilityBucket const * b = v.first;
+                double fraction_wt = v.second;
+
+                if (b) {
+                    /* for bucket that x belongs to,   treat as uniformly distributed
+                     *   fraction_wt = 1.0 -> 100% of b -> b.cdf()
+                     *   fraction_wt = 0.0 ->   0% of b -> b.cdf() - b.weight()
+                     */
+                    return b->cdf() + (fraction_wt - 1.0) * b->weight();
+                } else {
+                    if (x >= this->ref_value_) {
+                        /* x falls above farthest .hi_v[] */
+                        return 1.0;
+                    } else {
+                        /* x falls below farthest .lo_v[] */
+                        return 0.0;
+                    }
+                }
+            } /*cdf*/
+
+            /* O(n).  restores .cdf_valid_flag */
+            void renormalize() {
+                /* [1] compute sum of probability weights
+                 * [2] rescale all buckets so sum is 1.
+                 * [3] restore .cdf_valid_flag
+                 */
+
+                /* [1] */
+                double lo_sum_prob = 0.0;
+                for (ProbabilityBucket & b : this->lo_v_) {
+                    assert(lo_sum_prob >= 0.0);
+                    lo_sum_prob += b.weight();
+                }
+
+                double hi_sum_prob = 0.0;
+                for (ProbabilityBucket & b : this->hi_v_) {
+                    assert(hi_sum_prob >= 0.0);
+                    hi_sum_prob += b.weight();
+                }
+
+                assert(lo_sum_prob + hi_sum_prob > 0.0);
+
+                /* [2] */
+                double renorm_factor = 1.0 / (lo_sum_prob + hi_sum_prob);
+
+                {
+                    double lo_tail = lo_sum_prob;
+
+                    /* iterate over buckets < .ref_value,  in /descending/ domain order */
+                    for (ProbabilityBucket & b : this->lo_v_) {
+                        b.scale_weight(renorm_factor);
+                        b.assign_cdf(lo_tail);
+
+                        /* reduce tail after assign cdf to b,  since
+                         * lo_tail includes b's probability weight
+                         */
+                        lo_tail -= b.weight();
+
+                        /* numerical roundoff can cause this */
+                        if (lo_tail < 0.0)
+                            lo_tail = 0.0;
+                    }
+                }
+
+                {
+                    double cum_prob = lo_sum_prob;
+
+                    /* iterate over buckets >= .refvalue,  in /ascending/ domain order */
+                    for (ProbabilityBucket & b : this->hi_v_) {
+                        b.scale_weight(renorm_factor);
+
+                        /* increase cum_prob before assign cdf to b,
+                         * since b.cdf should include b's probability weight
+                         */
+                        cum_prob += b.weight();
+
+                        /* numerical roundoff can cause this */
+                        if (cum_prob >= 1.0)
+                            cum_prob = 1.0;
+
+                        b.assign_cdf(cum_prob);
+                    }
+                }
+
+                /* [3] */
+                this->cdf_valid_flag_ = true;
+            } /*renormalize*/
+
+            void check_renormalize() {
+                if (!this->cdf_valid_flag_) {
+                    this->renormalize();
+                }
+            } /*check_renormalize*/
+
+            void display(std::ostream & os) const {
+                os << "<ExplicitDist";
+                os << xtag("cdf_valid_flag", this->cdf_valid_flag_);
+                os << xtag("bucket_dx", this->bucket_dx_);
+                os << xtag("ref_value", this->ref_value_);
+                os << xtag("lz", this->lo_v_.size());
+                os << xtag("hz", this->hi_v_.size());
+                os << xtag("lo_v", this->lo_v_);
+                os << xtag("hi_v", this->hi_v_);
+                os << ">";
+            } /*display*/
+
+            std::string display_string() const { return xo::tostr(*this); }
+
+        private:
+            ExplicitDist(Domain bucket_dx, Domain ref_value)
+                : cdf_valid_flag_{true},
+                  bucket_dx_{bucket_dx},
+                  ref_value_{ref_value},
+                  lo_v_{},
+                  hi_v_{1}
+                {
+                    assert(bucket_dx_ > 0.0);
+
+                    /* must have at least one bucket,  since need total probability weight = 1 */
+                    this->hi_v_[0].assign_weight(1.0);
+                    this->hi_v_[0].assign_cdf(1.0);
+                }
+
+            ExplicitDist(size_t n, Domain bucket_dx, Domain ref_value)
+                : cdf_valid_flag_{true},
+                  bucket_dx_{bucket_dx},
+                  ref_value_{ref_value},
+                  lo_v_{},
+                  hi_v_{n}
+                {
+                    assert(bucket_dx > 0.0);
+                    /* must have at least one bucket,  since need total probability weight = 1 */
+                    assert(n > 0);
+
+                    double w = 1.0 / n;
+
+                    for(size_t i = 0; i<n; ++i) {
+                        this->hi_v_[i].assign_weight(w);
+                        this->hi_v_[i].assign_cdf((i+1)*w);
+                    }
+                } /*ctor*/
+
+        private:
+            /* with lz=.lo_v.size(), hz=hi_v.size():
+             *
+             *    .ref_value - .bucket_dx * lz
+             *    |                 .ref_value
+             *    |                 | .hi_v[0]     .hi_v[hz-1]
+             *    v                 v v            v
+             *                      |
+             *    +--+--+     +--+--|--+--+      +--+
+             *    |  |  | ... |  |  |  |  |  ... |  |
+             *    +--+--+     +--+--|--+--+      +--+
+             *                      |
+             *      ^              ^                ^
+             *      .lo_v[lz-1]    .lo_v[0]         .ref_value + .bucket_dx * hz
+             *
+             */
+
+            /* .cdf_valid_flag:
+             *   -> false whenever .scale_bucket() runs.
+             *   -> true whenever .renormalize() runs.
+             *
+             * if false,  then *this is 'not a probability distribution':
+             *   Sum b[i].weight != 1.0   (summing over probability buckets .lo_v[], .hi_v[])
+             *    i
+             */
+            bool cdf_valid_flag_ = true;
+
+            /* width of each bucket */
+            Domain bucket_dx_;
+
+            /* natural value here would be 0.
+             * use .pos_v[] for values >= .ref_value
+             * use .neg_v[] for values <  .ref_value
+             */
+            Domain ref_value_;
+
+            /* buckets for domain values < .ref_value
+             *
+             * with dx = .bucket_dx
+             * .lo_v[i] represents weights in range
+             *   [.ref_value - (i+1) * dx, .ref_value - i * dx)
+             */
+            WeightVector lo_v_;
+
+            /* buckets for domain values > .ref_value
+             *
+             * with dx = .bucket_dx
+             * .hi_v[i] represents weights in range
+             *   [.ref_value + i * dx, .ref_value + (i+1) * dx))
+             */
+            WeightVector hi_v_;
+
+        }; /*ExplicitDist*/
+
+        template<typename Domain>
+        inline std::ostream &
+        operator<<(std::ostream & os, ExplicitDist<Domain> const & x) {
+            x.display(os);
+            return os;
+        } /*operator<<*/
+    } /*namespace distribution*/
+} /*namespace xo*/
+
+/* end ExplicitDist.hpp */
--- a/xo-distribution/include/xo/distribution/Exponential.hpp
+++ b/xo-distribution/include/xo/distribution/Exponential.hpp
@ -0,0 +1,94 @@
+/* @file Exponential.hpp */
+
+#pragma once
+
+#include "xo/distribution/Distribution.hpp"
+#include <limits>
+#include <cmath>
+
+namespace xo {
+    namespace distribution {
+        /* Exponential probability distribution */
+        class Exponential : public Distribution<double> {
+        public:
+            explicit Exponential(double lm) : lambda_(lm) {}
+
+            /* exponential probability density:
+             *
+             *                    -lm.x
+             *    p(x) = {  lm . e      ,  x > 0
+             *           {  0           ,  x <= 0
+             *
+             */
+            static double density_impl(double lambda, double x) {
+                if(x <= 0.0)
+                    return 0.0;
+
+                return lambda * ::exp(-lambda * x);
+            } /*density_impl*/
+
+            /* exponential distribution:
+             *
+             *                -lm.x
+             *    F(x) = 1 - e         , x > 0
+             *    F(x) = 0             , x <= 0
+             */
+            static double distr_impl(double lambda, double x) {
+                if(x <= 0.0)
+                    return 0.0;
+
+                return 1.0 - ::exp(-lambda * x);
+            } /*distr_impl*/
+
+            /* compute x: F(x)=y,  where F(x)
+             * is the cumulative exponential probability distributio.
+             *
+             *                -lm.x
+             *    F(x) = 1 - e        , x>0
+             *
+             *     -lm.x
+             *    e      = 1 - F(x)
+             *
+             *    -lm.x  = ln(1 - F(x))
+             *
+             *    x      = -ln(1 - F(x)) / lm,   0 < F(x) < 1
+             */
+            static double distr_inverse_impl(double lambda, double Fx) {
+                if(Fx < 0.0)
+                    return -1.0 * std::numeric_limits<double>::infinity();
+                if(Fx >= 1.0)
+                    return +1.0 * std::numeric_limits<double>::infinity();
+
+                return (-1.0 / lambda) *  ::log(1.0 - Fx);
+            } /*distr_inverse_impl*/
+
+            double lambda() const { return lambda_; }
+
+            double density(double x) const {
+                return density_impl(this->lambda_, x);
+            } /*density*/
+
+            double distribution(double x) const {
+                return distr_impl(this->lambda_, x);
+            } /*distribution*/
+
+            double distribution_inverse(double y) const {
+                return distr_inverse_impl(this->lambda_, y);
+            } /*distribution_inverse*/
+
+            // ----- inherited from Distribution<double> -----
+
+            virtual double cdf(double const & x) const override {
+                return distribution(x);
+            } /*cdf*/
+
+        private:
+            /* intensity parameter.
+             * require: lambda > 0
+             */
+            double lambda_ = 1.0;
+        }; /*Exponential*/
+    } /*namespace distribution*/
+} /*namespace xo*/
+
+/* end Exponential.cpp */
--- a/xo-distribution/include/xo/distribution/KolmogorovSmirnov.hpp
+++ b/xo-distribution/include/xo/distribution/KolmogorovSmirnov.hpp
@ -0,0 +1,257 @@
+/* @file KolmogorovSmirnov.hpp */
+
+#pragma once
+
+#include "Distribution.hpp"
+#include "xo/indentlog/scope.hpp"
+#include <cmath>
+#include <cstdint>
+
+namespace xo {
+    /* Kolmogorov-Smirnov probability distribution */
+    namespace distribution {
+
+        class KolmogorovSmirnov : public Distribution<double> {
+        public:
+            KolmogorovSmirnov() = default;
+
+            /* kolmogorov-smirnov (KS) cumulative distribution
+             *
+             * cdf is defined by the series:
+             *
+             *                 +oo /     j          2  2  \
+             *   P1(x) = 1 + 2 Sum | (-1)  .exp(-2.j .x ) |
+             *                 j=1 \                      /
+             *
+             * this converges rapidly for x > 1.  expanding the first few terms:
+             *
+             *                  /          2            2  2            2  2            2  2  \
+             *   P1(x) ~= 1 + 2.| -exp(-2.x ) + exp(-2.2 .x ) - exp(-2.3 .x ) + exp(-2.4 .x ) |
+             *                  \                                                            /
+             *
+             *                  /          2            2 4          2 9           2 16 \
+             *          = 1 + 2.| -exp(-2.x ) + exp(-2.x )  - exp(2.x )  + exp(-2.x )   |
+             *                  \                                                      /
+             *
+             *                  /            4       9       16 \
+             *          = 1 + 2.| T(x) + T(x)  + T(x)  + T(x)   |
+             *                  \                               /
+             *
+             * with T(x) = .term_aux(1, x)
+             *
+             * with x=1:
+             *   term1_aux(1, 1): -0.135335
+             *   term1_aux(2, 1):  3.35463e-4
+             *   term1_aux(3, 1): -1.52300e-8
+             *   term1_aux(4, 1):  1.26642e-14
+             *   term1_aux(5, 1): -1.92875e-22
+             *
+             * with x=1.18:
+             *   term1_aux(1, 1): -0.0617414
+             *   term1_aux(2, 1):  1.45314e-5
+             *   term1_aux(3, 1): -1.30374e-11
+             *   term1_aux(4, 1):  4.45890e-20
+             *   term1_aux(5, 1): -5.83124e-31
+             *
+             * ----------------------------------------------------------------
+             *
+             * There's an alternative series for the KS distribution,
+             * that converges rapidly for small x (x < ~1.18)
+             *
+             *                                  /           2   2 \
+             *           sqrt(2.pi)   +oo /     |   (2j - 1) .pi  | \
+             *   P2(x) = ---------- . Sum | exp | - ------------  | |
+             *               x        j=1 \     |          2      | /
+             *                                  \       8.x       /
+             *
+             *                 /     2  \
+             *                 |   pi   |
+             * witu U(x) = exp | - ---- |
+             *                 |      2 |
+             *                 \   8.x  /
+             * we have
+             *                            /             2 \
+             *           sqrt(2.pi)   +oo |     (2j - 1)  |
+             *   P2(x) = ---------- . Sum | U(x)          |
+             *               x        j=1 |               |
+             *                            \               /
+             *
+             *                        /                                   \
+             *           sqrt(2.pi)   |            9       25      49     |
+             *         = ---------- . | U(x) + U(x)  + U(x)  + U(x)  + .. |
+             *               x        |                                   |
+             *                        \                                   /
+             *
+             * with x=1.0:
+             *   term2_aux(1, 1): 0.291213
+             *   term2_aux(2, 1): 1.50625e-5
+             *   term2_aux(3, 1): 4.02964e-14
+             *   term2_aux(4, 1): 5.57599e-27
+             *
+             * with x=1.18:
+             *   term2_aux(1, 1.18): 0.412292
+             *   term2_aux(2, 1.18): 3.44223e-4
+             *   term2_aux(3, 1.18): 2.39945e-10
+             *   term2_aux(4, 1.18): 1.39652e-19
+             */
+            static double term1_aux(uint32_t j, double x) {
+                double f = ::exp(-2.0 * x * x);
+                /* sgn:
+                 *  +1 for j in {2, 4, 6, ..}
+                 *  -1 for j in {1, 3, 5, ..}
+                 */
+                int32_t sgn = (((j & 0x1) == 0) ? +1 : -1);
+
+                if(j == 1) {
+                    return sgn * f;
+                } else {
+                    return sgn * ::pow(f, j*j);
+                }
+            } /*term1_aux*/
+
+            /* implements P1(x) above;  truncating at 4 terms.
+             * use .distr1() for x > ~1.18
+             */
+            static double distr1_impl(double x) {
+                double   f = term1_aux(1.0, x);
+                double  f2 = f*f;   /*f^2*/
+                double  f4 = f2*f2; /*f^4*/
+                double  f8 = f4*f4; /*f^8*/
+                double  f9 = f8*f;  /*f^9*/
+                double f16 = f8*f8; /*f^16*/
+
+                double   r = ((f16 + f9) + f4) + f;
+
+                return 1.0 + 2.0*r;
+            } /*distr1_impl*/
+
+            /* pi: 3.141592... */
+            static constexpr double c_pi = M_PI;
+            /* pi^2 / 8 */
+            static constexpr double c_pi2_8 = 0.125 * c_pi * c_pi;
+
+            /* computes
+             *
+             *       (2j-1)^2
+             *   U(x)
+             *
+             * require:
+             * - j >= 1
+             */
+            static double term2_aux(uint32_t j, double x) {
+                double u = ::exp(-c_pi2_8 / (x * x));
+
+                if(j == 1) {
+                    return u;
+                } else {
+                    uint32_t j2m1 = 2*j - 1;
+
+                    return ::pow(u, j2m1 * j2m1);
+                }
+            } /*term2_aux*/
+
+            /* implements P2(x) above; truncating at 4 terms */
+            static double distr2_impl(double x) {
+                static double c_sqrt_2pi
+                    = ::sqrt(2.0 * c_pi);
+
+                double scale = c_sqrt_2pi / x;
+
+                double  u = term2_aux(1, x);
+                double u2 = u*u;        /*u^2*/
+                double u4 = u2*u2;      /*u^4*/
+                double u8 = u4*u4;      /*u^8*/
+                double u16 = u8*u8;     /*u^16*/
+                double u32 = u16*u16;   /*u^32*/
+
+                double  u9 = u8*u;      /*u^9*/
+                double u25 = u16*u8*u;  /*u^25*/
+                double u49 = u32*u16*u; /*u^49*/
+
+                double r = ((u49 + u25) + u9) + u;
+
+                return scale * r;
+
+            } /*distr2_impl*/
+
+            static double distr_impl(double x) {
+                using xo::tostr;
+
+                constexpr char const * c_self = "KolmogorovSmirnov::distr_impl";
+
+                if(x < 0.0)
+                    throw std::runtime_error(tostr(c_self, "KS(x) cdf defined for x>=0"));
+
+                if(x == 0.0)
+                    return 0;
+
+                if(x < 1.18) {
+                    /* P2(x) converges fastest */
+                    return distr2_impl(x);
+                } else {
+                    /* P1(x) converges fastest */
+                    return distr1_impl(x);
+                }
+            } /*distr_impl*/
+
+            /* p-value for significance of a particular value of the KS-statistic
+             * obtain this statistic for a sample distribution using
+             *   Empirical.ks_stat_1sided(dexp)
+             * for some target distribution dexp
+             *
+             * ne.
+             *   for 1-sided test:  #of points in sample, i.e. Empirical.n_sample()
+             *   for 2-sided test:  (n1 . n2) / (n1 + n2)
+             * D.
+             *   max difference between observed and expected cumulative distributions.
+             *   see Empirical.ks_stat_1sided()
+             */
+            static double ks_pvalue(uint32_t ne, double D)
+                {
+                    using xo::scope;
+                    using xo::xtag;
+
+                    constexpr bool logging_enabled_flag = false;
+
+                    scope log(XO_DEBUG(logging_enabled_flag));
+
+                    double ne_sqrt = ::sqrt(ne);
+
+                    /* argument to KS-distribution.
+                     *   x in [0, +oo)
+                     *
+                     * A large value for x -> small value for 1 - KSdist(x),
+                     * i.e. represents probability that a sample of size ne with a
+                     * KS-statistic of magnitude D or larger,  could be drawn from
+                     * distribution dexp.
+                     */
+                    double x = ne_sqrt + 0.12 + (0.11 / ne_sqrt);
+
+                    double xD = x * D;
+
+                    double pvalue = 1.0 - distr_impl(xD);
+
+                    log && log(xtag("ne", ne),
+                               xtag("D", D),
+                               xtag("ne_sqrt", ne_sqrt),
+                               xtag("x", x),
+                               xtag("xD", xD),
+                               xtag("pvalue", pvalue));
+
+                    return pvalue;
+                } /*ks_pvalue*/
+
+            /* cumulative distribution function */
+            double distribution(double x) const {
+                return distr_impl(x);
+            } /*distribution*/
+
+            // ----- inherited from Distribution<double> -----
+            virtual double cdf(double const & x) const {
+                return this->distribution(x);
+            } /*cdf*/
+        }; /*KolmogorovSmirnov*/
+    } /*namespace distribution*/
+} /*namespace xo*/
+
+/* end KolmogorovSmirnov.hpp */
--- a/xo-distribution/include/xo/distribution/Normal.hpp
+++ b/xo-distribution/include/xo/distribution/Normal.hpp
@ -0,0 +1,54 @@
+/* @file Normal.hpp */
+
+#pragma once
+
+#include "Distribution.hpp"
+#include <cmath>
+
+namespace xo {
+    namespace distribution {
+        /* the guassian distribution,  with mean 0 and variance 1
+         */
+        class Normal : public Distribution<double> {
+        public:
+            Normal() = default;
+
+            /* N(0,1):  mean 0, sdev 1 */
+            static rp<Normal> unit() { return new Normal(); }
+
+            /* normal probability density:
+             *
+             *                  x^2
+             *            -(1/2)              1/2
+             *    p(x) = e            / (2.pi)
+             */
+            static double density(double x) {
+                static double c_sqrt_2pi = ::sqrt(2 * M_PI);
+
+                return ::exp(-0.5 * x * x) / c_sqrt_2pi;
+            } /*density*/
+
+            /* cumulative distribution function for N(0,1):
+             *
+             *   / x
+             *   |
+             *   |    p(x).dx
+             *   |
+             *   / -oo
+             *
+             * where p(x) is the normal density function p(x) = e^[-x^2/2]
+             */
+            static double cdf_impl(double x) {
+                return 0.5 * std::erfc(-M_SQRT1_2 * x);
+            } /*cdf_impl*/
+
+            // ----- inherited from Distribution<double> -----
+
+            virtual double cdf(double const & x) const override {
+                return cdf_impl(x);
+            } /*cdf*/
+        }; /*Normal*/
+    } /*namespace distribution*/
+} /*namespace xo*/
+
+/* end Normal.hpp */
--- a/xo-distribution/include/xo/distribution/StdEmpirical.hpp
+++ b/xo-distribution/include/xo/distribution/StdEmpirical.hpp
@ -0,0 +1,226 @@
+/* @file StdEmpirical.hpp */
+
+#pragma once
+
+#include "Empirical.hpp"
+#include "xo/ordinaltree/RedBlackTree.hpp"
+#include "xo/indentlog/scope.hpp"
+#include <map>
+#include <cstdint>
+
+namespace xo {
+    namespace distribution {
+        /* an empirical distribution over a given domain
+         * (e.g. double as proxy for IR),
+         * obtained by sorting equally-weighted samples
+         */
+        template<typename Domain>
+        class StdEmpirical : public Distribution<Domain> {
+        public:
+            using SampleMap = xo::tree::RedBlackTree<Domain,
+                                                     Counter,
+                                                     xo::tree::SumReduce<uint32_t>>;
+            using const_iterator = typename SampleMap::const_iterator;
+
+        public:
+            StdEmpirical() = default;
+
+            uint32_t n_sample() const { return n_sample_; }
+            const_iterator begin() const { return sample_map_.begin(); }
+            const_iterator end() const { return sample_map_.end(); }
+
+            /* compute kolmogorov-smirnov statistic with a non-sampled distribution.
+             * if d2 is sampled,  should use .ks_stat_2sided() instead
+             */
+            std::pair<double, double> ks_stat_1sided(Distribution<Domain> const & d2) const {
+                using xo::scope;
+                using xo::xtag;
+
+                constexpr char const * c_self = "Empirical::ks_stat_1sided";
+                constexpr bool c_logging_enabled = false;
+
+                scope lscope(c_self, c_logging_enabled);
+
+                double ks_stat = 0.0;
+
+                /* for i'th loop iteration below:
+                 *   xj_sum = sum of all x[j] with j<=i
+                 */
+                uint32_t xj_sum = 0;
+
+                /* #of sample in this distribution,  as double */
+                double nr = 1.0 / this->n_sample();
+
+                /* loop over elements x[i] of this (sampled) distribution,
+                 * compare cdf(x[i]) with d2.cdf(x[i])
+                 *
+                 * KS stat is the maximum observed difference.
+                 */
+                for(auto const & point : this->sample_map_) {
+                    Domain const & xi = point.first;
+                    uint32_t xi_count = point.second;
+
+                    xj_sum += xi_count;
+
+                    /* p1 = xi_sum / n1,  where n1 = .n_sample() */
+                    double p1 = xj_sum * nr;
+                    double p2 = d2.cdf(xi);
+
+                    double dp = std::abs(p1 - p2);
+
+                    if(c_logging_enabled)
+                        lscope.log(c_self,
+                                   xtag("xi", xi),
+                                   xtag("xi_count", xi_count),
+                                   xtag("xj_sum", xj_sum),
+                                   xtag("p1", p1),
+                                   xtag("p2", p2),
+                                   xtag("dp", dp));
+
+                    ks_stat = std::max(ks_stat, dp);
+                }
+
+                return std::pair<double, double>(this->n_sample(), ks_stat);
+            } /*ks_stat_1sided*/
+
+            /* compute kolmogorov-smirnov statistic with a sampled distribution;
+             * assess likelihood that both samples come from the same population.
+             */
+            std::pair<double, double> ks_stat_2sided(StdEmpirical<Domain> const & d2) {
+                /* loop once over both sample distributions;
+                 * algorithm is O(n1 + n2) for two empirical
+                 * distributions with n1,n2 points respectively
+                 */
+
+                /* return value observed here */
+                double ks_stat = 0.0;
+
+                auto     ix1 = this->sample_map_.begin();
+                auto end_ix1 = this->sample_map_.end();
+
+                auto     ix2 = d2.sample_map_.begin();
+                auto end_ix2 = this->sample_map_.end();
+
+                uint32_t xj1_sum = 0;
+                uint32_t xj2_sum = 0;
+
+                uint32_t n1 = this->n_sample();
+                uint32_t n2 = d2.n_sample();
+
+                double nr1 = 1.0 / n1;
+                double nr2 = 1.0 / n2;
+
+                /*  ^
+                 * 1|                   **.
+                 *  |                ...*..
+                 *  |                .  *
+                 *  |          **********
+                 *  |          *     .
+                 *  |        .........
+                 *  |        . *
+                 *  |   ********
+                 *  | ..*.....
+                 *  +----------------------->
+                 *           ^ ^
+                 *         ix1 ix2
+                 */
+
+                while ((ix1 != end_ix1) || (ix2 != end_ix2)) {
+                    /* on each iteration,  compare sample distributions
+                     * at smallest of (ix1->first, ix2->first)
+                     */
+
+                    bool advance_ix1_flag = false;
+                    bool advance_ix2_flag = false;
+
+                    if (ix1 == end_ix1) {
+                        /* only ix2 dereferenceable */
+                        advance_ix2_flag = true;
+                    } else if (ix2 == end_ix2) {
+                        /* only ix1 dereferenceable */
+                        advance_ix1_flag = true;
+                    } else {
+                        /* ix1,ix2 both dereferenceable */
+
+                        advance_ix1_flag = (ix1->first <= ix2->first);
+                        advance_ix2_flag = (ix2->first <= ix1->first);
+                    }
+
+                    if (advance_ix1_flag) {
+                        xj1_sum += ix1->first;
+                        ++ix1;
+                    }
+                    if (advance_ix2_flag) {
+                        xj2_sum += ix2->first;
+                        ++ix2;
+                    }
+
+                    double p1 = xj1_sum * nr1;
+                    double p2 = xj2_sum * nr2;
+
+                    double dp = std::abs(p1 - p2);
+
+                    ks_stat = std::max(ks_stat, dp);
+                }
+
+                /* ne = effective #of points to use when comparing 2 sample dist/s */
+                double ne = (n1 * n2) / static_cast<double>(n1 + n2);
+
+                return std::pair<double, double>(ne, ks_stat);
+
+#ifdef OBSOLETE
+                /* NOTE: this will cost O(n.log(n))
+                 *       (for empirical distributions with n points)
+                 *       if we loop over both sets of points in parallel,
+                 *       can get O(n) solution
+                 *
+                 */
+
+                /* loop over points in *this */
+                double ks1 = this->ks_stat_1sided(d2);
+                /* loop over points in d2 */
+                double ks2 = d2.ks_stat_1sided(*this);
+
+                return std::max(ks1, ks2);
+#endif
+            } /*ks_stat_2sided*/
+
+            // ----- inherited from Empirical<Domain> -----
+
+            /* introduce one new sample into this distribution */
+            virtual void include_sample(Domain const & x) {
+                ++(this->n_sample_);
+
+                /* note: xo::tree::RedBlackTree doesn't provide the usual reference result
+                 *       from operator[];  it needs to intervene after assignment to update
+                 *       order statistics
+                 */
+                auto lhs = this->sample_map_[x];
+
+                lhs += 1;
+            } /*include_sample*/
+
+            // ----- inherited from Distribution<Domain> -----
+
+            virtual double cdf(Domain const & x) const override {
+                /* computes #of samples with values <= x */
+                uint32_t nx = this->sample_map_.reduce_lub(x, true /*is_closed*/);
+                size_t n = this->n_sample();
+
+                return static_cast<double>(nx) / n;
+            } /*cdf*/
+
+        private:
+            /* count #of calls to .include_sample() */
+            CounterRep n_sample_ = 0;
+
+            /* .sample_map_[x] counts the #of times
+             * .include_sample(x) has been called.
+             */
+            SampleMap sample_map_;
+        }; /*StdEmpirical*/
+
+    } /*namespace distribution*/
+} /*namespace xo*/
+
+/* end StdEmpirical.hpp */
--- a/xo-distribution/include/xo/distribution/Uniform.hpp
+++ b/xo-distribution/include/xo/distribution/Uniform.hpp
@ -0,0 +1,57 @@
+/* @file Uniform.hpp */
+
+#pragma once
+
+#include "Distribution.hpp"
+
+namespace xo {
+    namespace distribution {
+        /* uniform distribution on an interval */
+        class Uniform : public Distribution<double> {
+        public:
+            Uniform(double lo, double hi) : lo_(lo), hi_(hi) {}
+
+            static rp<Uniform> unit() { return new Uniform(0.0, 1.0); }
+
+            static double density_impl(double lo, double hi, double x) {
+                if (x <= lo)
+                    return 0.0;
+                if (x >= hi)
+                    return 0.0;
+
+                return 1.0 / (hi - lo);
+            } /*density_impl*/
+
+            static double distr_impl(double lo, double hi, double x) {
+                if (x <= lo)
+                    return 0.0;
+                if (x >= hi)
+                    return 1.0;
+
+                return (x - lo) / (hi - lo);
+            } /*distr_impl*/
+
+            double lo() const { return lo_; }
+            double hi() const { return hi_; }
+
+            double density(double x) const {
+                return density_impl(this->lo_, this->hi_, x);
+            } /*density*/
+
+            double distribution(double x) const {
+                return distr_impl(this->lo_, this->hi_, x);
+            } /*distribution*/
+
+            // ----- inherited from Distribution<double> -----
+
+            virtual double cdf(double const & x) const override {
+                return this->distribution(x);
+            } /*cdf*/
+
+        private:
+            /* Invariant: .lo < .hi */
+            double lo_ = 0.0;
+            double hi_ = 1.0;
+        }; /*Uniform*/
+    } /*namespace distribution*/
+} /*namespace xo*/
--- a/xo-distribution/src/distribution/CMakeLists.txt
+++ b/xo-distribution/src/distribution/CMakeLists.txt
@ -0,0 +1,5 @@
+set(SELF_LIB xo_distribution)
+set(SELF_SRCS Normal.cpp)
+
+xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
+xo_dependency(${SELF_LIB} refcnt)
--- a/xo-distribution/src/distribution/Normal.cpp
+++ b/xo-distribution/src/distribution/Normal.cpp
@ -0,0 +1,9 @@
+/* @file Normal.cpp */
+
+#include "Normal.hpp"
+
+namespace xo {
+
+} /*namespace xo*/
+
+/* end Normal.cpp */
--- a/xo-distribution/utest/CMakeLists.txt
+++ b/xo-distribution/utest/CMakeLists.txt
@ -0,0 +1,13 @@
+# build unittest distribution/utest
+
+set(SELF_EXE utest.distribution)
+set(SELF_SRCS
+    distribution_utest_main.cpp
+    Normal.test.cpp
+    Uniform.test.cpp)
+
+xo_add_utest_executable(${SELF_EXE} ${SELF_SRCS})
+xo_self_dependency(${SELF_EXE} xo_distribution)
+xo_external_target_dependency(${SELF_EXE} Catch2 Catch2::Catch2)
+
+# end CMakeLists.txt
--- a/xo-distribution/utest/Normal.test.cpp
+++ b/xo-distribution/utest/Normal.test.cpp
@ -0,0 +1,24 @@
+/* @file Normal.test.cpp */
+
+#include "xo/distribution/Normal.hpp"
+#include <catch2/catch.hpp>
+
+namespace xo {
+    using xo::distribution::Normal;
+
+    namespace ut {
+        TEST_CASE("normal", "[distribution]") {
+            auto n01 = Normal::unit();
+
+            CHECK(n01->cdf(-3.0) == Approx(0.001349898).margin(1e-9));
+            CHECK(n01->cdf(-2.0) == Approx(0.0227501319).margin(1e-9));
+            CHECK(n01->cdf(-1.0) == Approx(0.1586552539).margin(1e-9));
+            CHECK(n01->cdf(0.0) == 0.5);
+            CHECK(n01->cdf(1.0) == 1.0 - n01->cdf(-1.0));
+            CHECK(n01->cdf(2.0) == 1.0 - n01->cdf(-2.0));
+            CHECK(n01->cdf(3.0) == 1.0 - n01->cdf(-3.0));
+        } /*TEST_CASE(normal)*/
+    } /*namespace ut*/
+} /*namespace xo*/
+
+/* end Normal.test.cpp */
--- a/xo-distribution/utest/Uniform.test.cpp
+++ b/xo-distribution/utest/Uniform.test.cpp
@ -0,0 +1,27 @@
+/* @file Uniform.test.cpp */
+
+#include "xo/distribution/Uniform.hpp"
+#include <catch2/catch.hpp>
+
+namespace xo {
+    using xo::distribution::Uniform;
+
+    namespace ut {
+        TEST_CASE("uniform", "[distribution]") {
+            auto u = Uniform::unit();
+
+            CHECK(u->cdf(-3.0) == 0.0);
+            CHECK(u->cdf(-2.0) == 0.0);
+            CHECK(u->cdf(-1.0) == 0.0);
+            CHECK(u->cdf(0.0) == 0.0);
+            CHECK(u->cdf(0.05) == 0.05);
+            CHECK(u->cdf(0.5) == 0.5);
+            CHECK(u->cdf(0.95) == 0.95);
+            CHECK(u->cdf(1.0) == 1.0);
+            CHECK(u->cdf(2.0) == 1.0);
+            CHECK(u->cdf(3.0) == 1.0);
+        } /*TEST_CASE(uniform)*/
+    } /*namespace ut*/
+} /*namespace xo*/
+
+/* end Uniform.test.cpp */
--- a/xo-distribution/utest/distribution_utest_main.cpp
+++ b/xo-distribution/utest/distribution_utest_main.cpp
@ -0,0 +1,6 @@
+/* file distribution_utest_main.cpp */
+
+#define CATCH_CONFIG_MAIN
+#include "catch2/catch.hpp"
+
+/* end distribution_utest_main.cpp */