diff --git a/xo-distribution/.gitignore b/xo-distribution/.gitignore new file mode 100644 index 00000000..13c0afb7 --- /dev/null +++ b/xo-distribution/.gitignore @@ -0,0 +1,6 @@ +# clangd working space (see emacs+lsp) +.cache +# typical cmake build directory (source-tree-nephew) +.build* +# symlink to builddir/compile_commands.json; should be set manually in dev sandbox +compile_commands.json diff --git a/xo-distribution/CMakeLists.txt b/xo-distribution/CMakeLists.txt new file mode 100644 index 00000000..12f76055 --- /dev/null +++ b/xo-distribution/CMakeLists.txt @@ -0,0 +1,16 @@ +# xo-distribution/CMakeLists.txt + +cmake_minimum_required(VERSION 3.10) + +project(xo_distribution VERSION 1.0) + +include(GNUInstallDirs) +include(cmake/xo-bootstrap-macros.cmake) + +xo_cxx_toplevel_options3() + +#add_subdirectory(example) +add_subdirectory(src/distribution) # note refcnt dep -> not header-only +add_subdirectory(utest) + +xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets) diff --git a/xo-distribution/cmake/xo-bootstrap-macros.cmake b/xo-distribution/cmake/xo-bootstrap-macros.cmake new file mode 100644 index 00000000..aba31169 --- /dev/null +++ b/xo-distribution/cmake/xo-bootstrap-macros.cmake @@ -0,0 +1,35 @@ +# ---------------------------------------------------------------- +# for example: +# $ PREFIX=/usr/local # for example +# $ cmake -DCMAKE_MODULE_PATH=prefix -DCMAKE_INSTALL_PREFIX=$PREFIX -B .build +# +# will get +# CMAKE_MODULE_PATH +# from xo-cmake-config --cmake-module-path +# +# and expect .cmake macros in +# CMAKE_MODULE_PATH/xo_macros/xo_cxx.cmake +# ---------------------------------------------------------------- + +find_program(XO_CMAKE_CONFIG_EXECUTABLE NAMES xo-cmake-config REQUIRED) + +if ("${XO_CMAKE_CONFIG_EXECUTABLE}" STREQUAL "XO_CMAKE_CONFIG_EXECUTABLE-NOT_FOUND") + message(FATAL "could not find xo-cmake-config executable") +endif() + +message(STATUS "XO_CMAKE_CONFIG_EXECUTABLE=${XO_CMAKE_CONFIG_EXECUTABLE}") + +if (NOT XO_SUBMODULE_BUILD) + if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix)) + # default to typical install location for xo-project-macros + execute_process(COMMAND ${XO_CMAKE_CONFIG_EXECUTABLE} --cmake-module-path OUTPUT_VARIABLE CMAKE_MODULE_PATH) + message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") + endif() +endif() + +# needs to have been installed somewhere on CMAKE_MODULE_PATH, +# (e.g. from xo-cmake with the same value for CMAKE_INSTALL_PREFIX) +# +include(xo_macros/xo_cxx) + +xo_cxx_bootstrap_message() diff --git a/xo-distribution/cmake/xo_distributionConfig.cmake.in b/xo-distribution/cmake/xo_distributionConfig.cmake.in new file mode 100644 index 00000000..d906d196 --- /dev/null +++ b/xo-distribution/cmake/xo_distributionConfig.cmake.in @@ -0,0 +1,6 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(refcnt) +include("${CMAKE_CURRENT_LIST_DIR}/xo_distributionTargets.cmake") +check_required_components("@PROJECT_NAME@") diff --git a/xo-distribution/include/xo/distribution/Distribution.hpp b/xo-distribution/include/xo/distribution/Distribution.hpp new file mode 100644 index 00000000..5768e9d9 --- /dev/null +++ b/xo-distribution/include/xo/distribution/Distribution.hpp @@ -0,0 +1,20 @@ +/* @file Distribution.hpp */ + +#pragma once + +#include "xo/refcnt/Refcounted.hpp" + +namespace xo { + namespace distribution { + /* abstract api for a cumulative probability distribution. + * over supplied Domain + */ + template + class Distribution : public ref::Refcount { + public: + virtual double cdf(Domain const & x) const = 0; + }; /*Distribution*/ + } /*namespace distribution*/ +} /*namespace xo*/ + +/* end Distribution.hpp */ diff --git a/xo-distribution/include/xo/distribution/Empirical.hpp b/xo-distribution/include/xo/distribution/Empirical.hpp new file mode 100644 index 00000000..2b7b9d67 --- /dev/null +++ b/xo-distribution/include/xo/distribution/Empirical.hpp @@ -0,0 +1,41 @@ +/* @file Empirical.hpp */ + +#pragma once + +#include "xo/distribution/Distribution.hpp" +#include "xo/ordinaltree/RedBlackTree.hpp" +#include "xo/indentlog/scope.hpp" +#include +#include + +namespace xo { + namespace distribution { + + /* representation for counter, + * recording #of samples with the same value + */ + using CounterRep = uint32_t; + + /* counter; for use with StdEmpirical distribution below + */ + class Counter { + public: + Counter() = default; + Counter(CounterRep n) : count_(n) {} + + CounterRep count() const { return count_; } + + void incr() { ++count_; } + + operator CounterRep () const { return count_; } + + Counter & operator+=(CounterRep n) { count_ += n; return *this; } + + private: + CounterRep count_ = 0; + }; /*Counter*/ + + } /*namespace disitribution*/ +} /*namespace xo*/ + +/* end Empirical.hpp */ diff --git a/xo-distribution/include/xo/distribution/ExplicitDist.hpp b/xo-distribution/include/xo/distribution/ExplicitDist.hpp new file mode 100644 index 00000000..abb673bb --- /dev/null +++ b/xo-distribution/include/xo/distribution/ExplicitDist.hpp @@ -0,0 +1,748 @@ +/* file ExplicitDist.hpp + * + * author: Roland Conybeare, Oct 2022 + */ + +#pragma once + +#include "Distribution.hpp" +#include "Normal.hpp" +#include "xo/indentlog/scope.hpp" +#include "xo/indentlog/print/vector.hpp" +#include "xo/indentlog/print/tostr.hpp" +#include +#include +#include + +namespace xo { + using xo::xtag; + + namespace distribution { + class ProbabilityBucket { + public: + ProbabilityBucket() = default; + + double weight() const { return wt_; } + double cdf() const { return cdf_; } + + /* note: when calling this, must invalidate ExplicitDist.cdf_valid_flag */ + void scale_weight(double k) { this->wt_ *= k; } + + void assign_weight(double w) { this->wt_ = w; } + /* implementation method: only ExplicitDist.renormalize() should call this */ + void assign_cdf(double x) { this->cdf_ = x; } + + void display(std::ostream & os) const { + using xo::xtag; + + os << "wt_) + << xtag("cdf", this->cdf_) + << ">"; + } /*display*/ + + private: + /* probability assigned to this bucket. + * updated in-place by ExplicitDist.include_sample() + */ + double wt_ = 0.0; + /* cumulative probability assigned to this bucket b + * and all buckets b[i] for domain values < b + * + * invalidated by ExplicitDist.scale_bucket() + * see ExplicitDist. + */ + double cdf_ = 0.0; + }; /*ProbabilityBucket*/ + + inline std::ostream & operator<<(std::ostream & os, ProbabilityBucket const & x) { + x.display(os); + return os; + } /*operator<<*/ + + namespace detail { + /* three kinds of index here: + * 1. lo: non-negative index into ExplicitDist.lo_v_[] + * 2. hi: non-negative index into ExplicitDist.hi_v_[] + * 3. signed: if >=0: index into ExplicitDist.hi_v_[] + * if <0: index into ExplicitDist.lo_v_[] + * + * with lz=.lo_v.size(), hz=hi_v.size(): + * + * .hi_v[0] .hi_v[hz-1] + * | v v + * | + * +--+--+ +--+--|--+--+ +--+ + * | | | ... | | | | | ... | | + * +--+--+ +--+--|--+--+ +--+ + * ^ ^ + * .lo_v[lz-1] .lo_v[0] + * + * ^ ^ ^ ^ + * signed -lz -1| 0 ... hz-1 + */ + class ExplicitDistIndexUtil { + public: + static size_t signed2hi(int32_t signed_ix) { return signed_ix; } + static size_t signed2lo(int32_t signed_ix) { return -signed_ix - 1; } + static int32_t lo2signed(size_t lo_ix) { return -static_cast(lo_ix)-1; } + static int32_t hi2signed(size_t hi_ix) { return hi_ix; } + + static ProbabilityBucket const * signed2bucket_c(int32_t signed_ix, + std::vector const * lo_v, + std::vector const * hi_v) { + if (signed_ix >= 0) { + size_t hi_ix = signed2hi(signed_ix); + + if (hi_v && (hi_ix < hi_v->size())) + return &(*hi_v)[hi_ix]; + } else { + size_t lo_ix = signed2lo(signed_ix); + + if (lo_v && (lo_ix < lo_v->size())) + return &(*lo_v)[lo_ix]; + } + + return nullptr; + } /*signed2bucket_c*/ + + static ProbabilityBucket * signed2bucket(int32_t signed_ix, + std::vector * lo_v, + std::vector * hi_v) { + ProbabilityBucket const * b = signed2bucket_c(signed_ix, lo_v, hi_v); + return const_cast(b); + } /*signed2bucket*/ + + }; /*ExplicitDistIndexUtil*/ + + class ExplicitDistIterator : public ExplicitDistIndexUtil { + public: + ExplicitDistIterator() = default; + ExplicitDistIterator(int32_t signed_ix, + std::vector * lo_v, + std::vector * hi_v) + : signed_ix_{signed_ix}, p_lo_v_{lo_v}, p_hi_v_{hi_v} {} + + ProbabilityBucket & operator*() { + ProbabilityBucket * pb = signed2bucket(this->signed_ix_, + this->p_lo_v_, + this->p_hi_v_); + if (!pb) { + throw std::runtime_error("ExplicitDistIterator: attempt to deref invalid iterator"); + } + + return *pb; + } /*operator**/ + + ExplicitDistIterator & operator++() { ++(this->signed_ix_); return *this; } + ExplicitDistIterator & operator--() { --(this->signed_ix_); return *this; } + + private: + /* signed iterator */ + int32_t signed_ix_ = 0; + /* will be ExplicitDist.lo_v[] */ + std::vector * p_lo_v_ = nullptr; + /* will be ExplicitDist.hi_v[] */ + std::vector * p_hi_v_ = nullptr; + }; /*ExplicitDistIterator*/ + } /*namespace detail*/ + + /* explicit distribution with buckets. + * (if you want to avoid bucketing at the expense of memory, + * see StdEmpirical) + * + * sample-independent buckets can be faster to the extent + * {#of buckets} << {#of samples} + * + * in particular + * | StdEmpirical | PackedEmpirical + * ------------------------------+--------------+----------------- + * update existing sample/bucket | O(log(n)) | O(1) + * create new sample/bucket | O(log(n)) | O(log(n)) + * cdf | O(log(n)) | O(n) + * + * PackedEmpirical offers scaling + renormalization + * + * since .cdf() is slow, .ks_stat_1sided() is supported only in StdEmpirical. + * (could generalize in future to some other implementation with fast .cdf()) + * + * Require: + * Domain is something with metric, e.g. int|double. + * categorical domain not supported here. + * + * see also: statistics/Histogram. Histogram keeps more per-bucket statistics + */ + template + class ExplicitDist : public Distribution, public detail::ExplicitDistIndexUtil { + public: + using WeightVector = std::vector; + using iterator = detail::ExplicitDistIterator; + + public: + static rp make(Domain bucket_dx, Domain ref_value) { + return new ExplicitDist(bucket_dx, ref_value); + } + /* create distribution with n buckets of width bucket_dx, + * covering range [ref_value, ref_value + n * bucket_dx] + */ + static rp make_n(size_t n, Domain bucket_dx, Domain ref_value) { + return new ExplicitDist(n, bucket_dx, ref_value); + } + + size_t n_bucket() const { return this->lo_v_.size() + this->hi_v_.size(); } + + /* lub domain value with .cdf(lo) = 0 */ + Domain lo() const { + std::size_t lz = this->lo_v_.size(); + + return this->ref_value_ - lz * this->bucket_dx_; + } /*lo*/ + + /* glb domain value with .cdf(hi) = 1 */ + Domain hi() const { + std::size_t hz = this->hi_v_.size(); + + return this->ref_value_ + hz * this->bucket_dx_; + } /*hi*/ + + Domain bucket_lo(int32_t signed_ix) const { + return this->ref_value_ + signed_ix * this->bucket_dx_; + } /*bucket_lo*/ + + Domain bucket_mid(int32_t signed_ix) const { + return this->ref_value_ + (0.5 + signed_ix) * this->bucket_dx_; + } /*bucket_mid*/ + + Domain bucket_hi(int32_t signed_ix) const { + return this->bucket_lo(signed_ix + 1); + } /*bucket_hi*/ + + iterator begin() { return iterator(lo2signed(this->lo_v_.size() - 1), + &(this->lo_v_), + &(this->hi_v_)); } + iterator end() { return iterator(hi2signed(this->hi_v_.size()), + &(this->lo_v_), + &(this->hi_v_)); } + + /* probability density at domain value x */ + double density(Domain const & x) const { + /* careful! may need to renormalize */ + { + ExplicitDist * self = const_cast *>(this); + + self->check_renormalize(); + } + + auto v = this->lookup_bucket(x); + ProbabilityBucket const * b = v.first; + + if (b) { + /* probability density is constant within each bucket */ + return b->weight() / this->bucket_dx_; + } else { + return 0.0; + } + } /*density*/ + + /* pair {bucket glb, density} for all buckets, in increasing domain order */ + std::vector> density_v() const { + /* careful! may need to renormalize */ + { + ExplicitDist * self = const_cast *>(this); + + self->check_renormalize(); + } + + size_t n = this->n_bucket(); + std::vector> retval(n); + + double w2d = 1.0 / this->bucket_dx_; + + for(size_t i=0; ibucket_lo(i); + ProbabilityBucket const * b = this->lookup_signed_bucket(i); + + assert(b); + + double density = b->weight() * w2d; + + retval[i] = std::make_pair(lo, density); + } + + return retval; + } /*density_v*/ + + /* return: + * .first signed bucket index ix; + * refers to .lo_v[1-ix] if ix<0; + * refers to .hi_v[+ix] if ix>=0 + * .second fractional weight within bucket associated with x. + * not scaled by ProbabilityWeight.weight + */ + std::pair signed_bucket_index(Domain const & x) const { + double ix_f = ::floor((x - this->ref_value_) / this->bucket_dx_); + int32_t ix = ix_f; + + /* + * ^ + * |............ + * | : | + * | wt : | unit area + * | : | + * +-----------+ + * bucket_lo ^ bucket_lo + .bucket_dx + * x + */ + + Domain bucket_lo = this->ref_value_ + (ix * this->bucket_dx_); + double wt = (x - bucket_lo) / this->bucket_dx_; + + return std::make_pair(ix, wt); + } /*signed_bucket_index*/ + + ProbabilityBucket const * lookup_signed_bucket(int32_t signed_ix) const { + return signed2bucket_c(signed_ix, &(this->lo_v_), &(this->hi_v_)); + } /*lookup_signed_bucket*/ + + /* non-const version with write access */ + ProbabilityBucket * lookup_signed_bucket(int32_t signed_ix) { + return signed2bucket(signed_ix, &(this->lo_v_), &(this->hi_v_)); + } /*lookup_signed_bucket*/ + + /* return null pointer if bucket that would contain x not represented + * .second is fractional weight (relative to unity) within selected bucket + */ + std::pair lookup_bucket(Domain const & x) const { + /* signed index. + * ix>=0 => .lo_v[] + * ix< 0 => .hi_v[] + */ + std::pair v = this->signed_bucket_index(x); + + int32_t ix = v.first; + double fraction_wt = v.second; + ProbabilityBucket const * pw = this->lookup_signed_bucket(ix); + + return std::make_pair(pw, fraction_wt); + } /*lookup_bucket*/ + + /* non-const version */ + std::pair lookup_bucket(Domain const & x) { + ExplicitDist const * c_self = this; + + std::pair v = c_self->lookup_bucket(x); + ProbabilityBucket * b = const_cast(v.first); + double fraction_wt = v.second; + + return std::make_pair(b, fraction_wt); + } /*lookup_bucket*/ + + /* like .lookup_bucket(), but create bucket if not already present + * .second reports fractional weight (relative to unity) within bucket + * that contains x. + */ + std::pair establish_bucket(Domain const & x) { + /* signed index. + * ix>=0 => .lo_v[] + * ix< 0 => .hi_v[] + */ + std::pair v = this->signed_bucket_index(x); + + int32_t ix = v.first; + double fraction_wt = v.second; + ProbabilityBucket * pw = nullptr; + + if (ix >= 0) { + size_t hi_ix = signed2hi(ix); + + if (hi_ix >= this->hi_v_.size()) { + /* need to expand .hi_v[] */ + this->hi_v_.resize(hi_ix+1); + } + + pw = &(this->hi_v_[hi_ix]); + } else { + size_t lo_ix = signed2lo(ix); + + if (lo_ix >= this->lo_v_.size()) { + /* need to expand .lo_v[] */ + this->lo_v_.resize(lo_ix+1); + } + + pw = &(this->lo_v_[lo_ix]); + } + + return std::make_pair(pw, fraction_wt); + } /*establish_bucket*/ + + void scale_bucket_by_signed_index(int32_t signed_ix, double k) { + ProbabilityBucket * b = this->lookup_signed_bucket(signed_ix); + + if (b) { + b->scale_weight(k); + + this->cdf_valid_flag_ = false; + } else { + /* if bucket isn't represented, then corresponding weight is zero, + * both before and after scaling. In this special case + * representation didn't change -> don't invalidate cdf + */ + } + } /*scale_bucket_by_signed_index*/ + + /* scale probability assigned to bucket for domain value x, by factor k */ + void scale_bucket(Domain const & x, double k) { + assert(k >= 0.0); + + std::pair v = this->signed_bucket_index(x); + + this->scale_bucket_by_signed_index(v.first, k); + } /*scale_bucket*/ + + /* scale probability weight for buckets in [lo, hi] by fn(p) + * for a point p in each bucket, however: + * under no circumstances try to evaluate fn() outside [lo, hi] + * (relevant if either lo/hi fall inside a bucket) + */ + template + void scale_interval(Domain const & lo, + Domain const & hi, + Function && fn) + { + XO_SCOPE_DISABLED(lscope); + + /* note: using inclusive upper index bounds here; + * variying from idiomatic c++ style for symmetry + */ + + int32_t min_bucket_ix = lo2signed(this->lo_v_.size() - 1); + int32_t max_bucket_ix = hi2signed(this->hi_v_.size() - 1); + + int32_t lo_ix = this->signed_bucket_index(lo).first; + int32_t hi_ix = this->signed_bucket_index(hi).first; + + /* start_ix: lowest explicit bucket associating with [lo, hi) */ + int32_t start_ix = std::max(min_bucket_ix, lo_ix + 1); + /* end_ix: highest explicit bucket associating with [lo, hi) */ + int32_t end_ix = std::min(max_bucket_ix, hi_ix); + + if (lscope.enabled()) { + lscope.log(xtag("min_bucket_ix", min_bucket_ix), + xtag("max_bucket_ix", max_bucket_ix), + xtag("lo_ix", lo_ix), + xtag("hi_ix", hi_ix), + xtag("start_ix", start_ix), + xtag("end_ix", end_ix)); + } + + /* for endpoints: avoid evaluating fn() outside [lo, hi] */ + if (min_bucket_ix <= lo_ix) { + double lo_k = fn(lo); + + if (lscope.enabled()) + lscope.log("A", xtag("lo_ix", lo_ix), xtag("lo_k", lo_k)); + + this->scale_bucket_by_signed_index(lo_ix, lo_k); + } + + for(int32_t ix = start_ix; ix <= end_ix; ++ix) { + double k = fn(this->bucket_mid(ix)); + + if (lscope.enabled()) + lscope.log("B", xtag("ix", ix), xtag("k", k)); + + this->scale_bucket_by_signed_index(ix, k); + } + + /* for endpoints: avoid evaluating fn() outside [lo, hi] */ + if (hi_ix <= max_bucket_ix) { + double hi_k = fn(hi); + + if (lscope.enabled()) + lscope.log("C", xtag("hi_ix", hi_ix), xtag("hi_k", hi_k)); + + this->scale_bucket_by_signed_index(hi_ix, hi_k); + } + } /*scale_interval*/ + + template + void scale_all(Function && fn) + { + this->scale_interval(this->lo(), this->hi(), fn); + } /*scale_all*/ + + /* convenience: scale by scaled normal cdf. + * + * support use case where: + * 1. explicit dist represents distribution of asset value; + * 2. assume one party A to trade/order has noisy signal, + * with normally-distributed error around (unknown) true value s, + * with variance o^2; + * 3. observe a trade/order at some price p, + * giving us one-sided information about A's knowledge in two scenarios: + * i. A trades + * ii. A does not trade + * + * We will be applying Bayes' rule to update prior. + * If buy(p) = {A buys}, + * N(x) is cumulative normal distribution: + * N(x) -> 0 as x -> -oo; + * N(x) -> 1 as x -> +oo; + * + * -1/2 2 + * (d/dx)N(x) = (2.pi) . exp(-x /2) + * then: + * P{s=s'} + * P{s=s'|buy(p)} = ---------.P{buy(p)|s=s'} + * P{buy(p)} + * + * P{s=s'} + * = ---------.N[(1/o)(s'-p)], + * P{buy(p)} + * + * In this scenario, P{s=s'} is our prior, represented by distribution *this. + * The unconditional probability P{buy(p)} is not a function of s', so: + * + * / + * | + * P{buy(p)} = | P{s=s'} . N[(1/o)(s'-p)] . ds' + * | + * / + * + * in other words can scale explicit prior *this by N[(1/o)(s'-p)] then renormalize + * so total probability weight is 1. + * + * Conversely, if event is {A sells}, similar argument leads to scaling + * by N[(1/o)(p-s')] + * + * sign. scale by N[(1/sigma).sign.(x-mean)] + */ + void scale_by_normal_cdf(int sign, Domain const & mean, Domain const & sigma) { + auto fn([sign, mean, sigma](Domain const & s) { + return Normal::cdf_impl(sign * (s - mean) / sigma); + }); + + this->scale_all(fn); + } /*scale_by_normal_cdf*/ + + // ----- inherited from Distribution ----- + + /* note: marked const; actually "logically const" */ + virtual double cdf(Domain const & x) const override { + /* .cdf() is slow here, because partial sums aren't stored + * --> have to sum over O(n) buckets + */ + { + ExplicitDist * self = const_cast *>(this); + + self->check_renormalize(); + } + + std::pair v = this->lookup_bucket(x); + + ProbabilityBucket const * b = v.first; + double fraction_wt = v.second; + + if (b) { + /* for bucket that x belongs to, treat as uniformly distributed + * fraction_wt = 1.0 -> 100% of b -> b.cdf() + * fraction_wt = 0.0 -> 0% of b -> b.cdf() - b.weight() + */ + return b->cdf() + (fraction_wt - 1.0) * b->weight(); + } else { + if (x >= this->ref_value_) { + /* x falls above farthest .hi_v[] */ + return 1.0; + } else { + /* x falls below farthest .lo_v[] */ + return 0.0; + } + } + } /*cdf*/ + + /* O(n). restores .cdf_valid_flag */ + void renormalize() { + /* [1] compute sum of probability weights + * [2] rescale all buckets so sum is 1. + * [3] restore .cdf_valid_flag + */ + + /* [1] */ + double lo_sum_prob = 0.0; + for (ProbabilityBucket & b : this->lo_v_) { + assert(lo_sum_prob >= 0.0); + lo_sum_prob += b.weight(); + } + + double hi_sum_prob = 0.0; + for (ProbabilityBucket & b : this->hi_v_) { + assert(hi_sum_prob >= 0.0); + hi_sum_prob += b.weight(); + } + + assert(lo_sum_prob + hi_sum_prob > 0.0); + + /* [2] */ + double renorm_factor = 1.0 / (lo_sum_prob + hi_sum_prob); + + { + double lo_tail = lo_sum_prob; + + /* iterate over buckets < .ref_value, in /descending/ domain order */ + for (ProbabilityBucket & b : this->lo_v_) { + b.scale_weight(renorm_factor); + b.assign_cdf(lo_tail); + + /* reduce tail after assign cdf to b, since + * lo_tail includes b's probability weight + */ + lo_tail -= b.weight(); + + /* numerical roundoff can cause this */ + if (lo_tail < 0.0) + lo_tail = 0.0; + } + } + + { + double cum_prob = lo_sum_prob; + + /* iterate over buckets >= .refvalue, in /ascending/ domain order */ + for (ProbabilityBucket & b : this->hi_v_) { + b.scale_weight(renorm_factor); + + /* increase cum_prob before assign cdf to b, + * since b.cdf should include b's probability weight + */ + cum_prob += b.weight(); + + /* numerical roundoff can cause this */ + if (cum_prob >= 1.0) + cum_prob = 1.0; + + b.assign_cdf(cum_prob); + } + } + + /* [3] */ + this->cdf_valid_flag_ = true; + } /*renormalize*/ + + void check_renormalize() { + if (!this->cdf_valid_flag_) { + this->renormalize(); + } + } /*check_renormalize*/ + + void display(std::ostream & os) const { + os << "cdf_valid_flag_); + os << xtag("bucket_dx", this->bucket_dx_); + os << xtag("ref_value", this->ref_value_); + os << xtag("lz", this->lo_v_.size()); + os << xtag("hz", this->hi_v_.size()); + os << xtag("lo_v", this->lo_v_); + os << xtag("hi_v", this->hi_v_); + os << ">"; + } /*display*/ + + std::string display_string() const { return xo::tostr(*this); } + + private: + ExplicitDist(Domain bucket_dx, Domain ref_value) + : cdf_valid_flag_{true}, + bucket_dx_{bucket_dx}, + ref_value_{ref_value}, + lo_v_{}, + hi_v_{1} + { + assert(bucket_dx_ > 0.0); + + /* must have at least one bucket, since need total probability weight = 1 */ + this->hi_v_[0].assign_weight(1.0); + this->hi_v_[0].assign_cdf(1.0); + } + + ExplicitDist(size_t n, Domain bucket_dx, Domain ref_value) + : cdf_valid_flag_{true}, + bucket_dx_{bucket_dx}, + ref_value_{ref_value}, + lo_v_{}, + hi_v_{n} + { + assert(bucket_dx > 0.0); + /* must have at least one bucket, since need total probability weight = 1 */ + assert(n > 0); + + double w = 1.0 / n; + + for(size_t i = 0; ihi_v_[i].assign_weight(w); + this->hi_v_[i].assign_cdf((i+1)*w); + } + } /*ctor*/ + + private: + /* with lz=.lo_v.size(), hz=hi_v.size(): + * + * .ref_value - .bucket_dx * lz + * | .ref_value + * | | .hi_v[0] .hi_v[hz-1] + * v v v v + * | + * +--+--+ +--+--|--+--+ +--+ + * | | | ... | | | | | ... | | + * +--+--+ +--+--|--+--+ +--+ + * | + * ^ ^ ^ + * .lo_v[lz-1] .lo_v[0] .ref_value + .bucket_dx * hz + * + */ + + /* .cdf_valid_flag: + * -> false whenever .scale_bucket() runs. + * -> true whenever .renormalize() runs. + * + * if false, then *this is 'not a probability distribution': + * Sum b[i].weight != 1.0 (summing over probability buckets .lo_v[], .hi_v[]) + * i + */ + bool cdf_valid_flag_ = true; + + /* width of each bucket */ + Domain bucket_dx_; + + /* natural value here would be 0. + * use .pos_v[] for values >= .ref_value + * use .neg_v[] for values < .ref_value + */ + Domain ref_value_; + + /* buckets for domain values < .ref_value + * + * with dx = .bucket_dx + * .lo_v[i] represents weights in range + * [.ref_value - (i+1) * dx, .ref_value - i * dx) + */ + WeightVector lo_v_; + + /* buckets for domain values > .ref_value + * + * with dx = .bucket_dx + * .hi_v[i] represents weights in range + * [.ref_value + i * dx, .ref_value + (i+1) * dx)) + */ + WeightVector hi_v_; + + }; /*ExplicitDist*/ + + template + inline std::ostream & + operator<<(std::ostream & os, ExplicitDist const & x) { + x.display(os); + return os; + } /*operator<<*/ + } /*namespace distribution*/ +} /*namespace xo*/ + +/* end ExplicitDist.hpp */ diff --git a/xo-distribution/include/xo/distribution/Exponential.hpp b/xo-distribution/include/xo/distribution/Exponential.hpp new file mode 100644 index 00000000..f8dd2594 --- /dev/null +++ b/xo-distribution/include/xo/distribution/Exponential.hpp @@ -0,0 +1,94 @@ +/* @file Exponential.hpp */ + +#pragma once + +#include "xo/distribution/Distribution.hpp" +#include +#include + +namespace xo { + namespace distribution { + /* Exponential probability distribution */ + class Exponential : public Distribution { + public: + explicit Exponential(double lm) : lambda_(lm) {} + + /* exponential probability density: + * + * -lm.x + * p(x) = { lm . e , x > 0 + * { 0 , x <= 0 + * + */ + static double density_impl(double lambda, double x) { + if(x <= 0.0) + return 0.0; + + return lambda * ::exp(-lambda * x); + } /*density_impl*/ + + /* exponential distribution: + * + * -lm.x + * F(x) = 1 - e , x > 0 + * F(x) = 0 , x <= 0 + */ + static double distr_impl(double lambda, double x) { + if(x <= 0.0) + return 0.0; + + return 1.0 - ::exp(-lambda * x); + } /*distr_impl*/ + + /* compute x: F(x)=y, where F(x) + * is the cumulative exponential probability distributio. + * + * -lm.x + * F(x) = 1 - e , x>0 + * + * -lm.x + * e = 1 - F(x) + * + * -lm.x = ln(1 - F(x)) + * + * x = -ln(1 - F(x)) / lm, 0 < F(x) < 1 + */ + static double distr_inverse_impl(double lambda, double Fx) { + if(Fx < 0.0) + return -1.0 * std::numeric_limits::infinity(); + if(Fx >= 1.0) + return +1.0 * std::numeric_limits::infinity(); + + return (-1.0 / lambda) * ::log(1.0 - Fx); + } /*distr_inverse_impl*/ + + double lambda() const { return lambda_; } + + double density(double x) const { + return density_impl(this->lambda_, x); + } /*density*/ + + double distribution(double x) const { + return distr_impl(this->lambda_, x); + } /*distribution*/ + + double distribution_inverse(double y) const { + return distr_inverse_impl(this->lambda_, y); + } /*distribution_inverse*/ + + // ----- inherited from Distribution ----- + + virtual double cdf(double const & x) const override { + return distribution(x); + } /*cdf*/ + + private: + /* intensity parameter. + * require: lambda > 0 + */ + double lambda_ = 1.0; + }; /*Exponential*/ + } /*namespace distribution*/ +} /*namespace xo*/ + +/* end Exponential.cpp */ diff --git a/xo-distribution/include/xo/distribution/KolmogorovSmirnov.hpp b/xo-distribution/include/xo/distribution/KolmogorovSmirnov.hpp new file mode 100644 index 00000000..ee0ec160 --- /dev/null +++ b/xo-distribution/include/xo/distribution/KolmogorovSmirnov.hpp @@ -0,0 +1,257 @@ +/* @file KolmogorovSmirnov.hpp */ + +#pragma once + +#include "Distribution.hpp" +#include "xo/indentlog/scope.hpp" +#include +#include + +namespace xo { + /* Kolmogorov-Smirnov probability distribution */ + namespace distribution { + + class KolmogorovSmirnov : public Distribution { + public: + KolmogorovSmirnov() = default; + + /* kolmogorov-smirnov (KS) cumulative distribution + * + * cdf is defined by the series: + * + * +oo / j 2 2 \ + * P1(x) = 1 + 2 Sum | (-1) .exp(-2.j .x ) | + * j=1 \ / + * + * this converges rapidly for x > 1. expanding the first few terms: + * + * / 2 2 2 2 2 2 2 \ + * P1(x) ~= 1 + 2.| -exp(-2.x ) + exp(-2.2 .x ) - exp(-2.3 .x ) + exp(-2.4 .x ) | + * \ / + * + * / 2 2 4 2 9 2 16 \ + * = 1 + 2.| -exp(-2.x ) + exp(-2.x ) - exp(2.x ) + exp(-2.x ) | + * \ / + * + * / 4 9 16 \ + * = 1 + 2.| T(x) + T(x) + T(x) + T(x) | + * \ / + * + * with T(x) = .term_aux(1, x) + * + * with x=1: + * term1_aux(1, 1): -0.135335 + * term1_aux(2, 1): 3.35463e-4 + * term1_aux(3, 1): -1.52300e-8 + * term1_aux(4, 1): 1.26642e-14 + * term1_aux(5, 1): -1.92875e-22 + * + * with x=1.18: + * term1_aux(1, 1): -0.0617414 + * term1_aux(2, 1): 1.45314e-5 + * term1_aux(3, 1): -1.30374e-11 + * term1_aux(4, 1): 4.45890e-20 + * term1_aux(5, 1): -5.83124e-31 + * + * ---------------------------------------------------------------- + * + * There's an alternative series for the KS distribution, + * that converges rapidly for small x (x < ~1.18) + * + * / 2 2 \ + * sqrt(2.pi) +oo / | (2j - 1) .pi | \ + * P2(x) = ---------- . Sum | exp | - ------------ | | + * x j=1 \ | 2 | / + * \ 8.x / + * + * / 2 \ + * | pi | + * witu U(x) = exp | - ---- | + * | 2 | + * \ 8.x / + * we have + * / 2 \ + * sqrt(2.pi) +oo | (2j - 1) | + * P2(x) = ---------- . Sum | U(x) | + * x j=1 | | + * \ / + * + * / \ + * sqrt(2.pi) | 9 25 49 | + * = ---------- . | U(x) + U(x) + U(x) + U(x) + .. | + * x | | + * \ / + * + * with x=1.0: + * term2_aux(1, 1): 0.291213 + * term2_aux(2, 1): 1.50625e-5 + * term2_aux(3, 1): 4.02964e-14 + * term2_aux(4, 1): 5.57599e-27 + * + * with x=1.18: + * term2_aux(1, 1.18): 0.412292 + * term2_aux(2, 1.18): 3.44223e-4 + * term2_aux(3, 1.18): 2.39945e-10 + * term2_aux(4, 1.18): 1.39652e-19 + */ + static double term1_aux(uint32_t j, double x) { + double f = ::exp(-2.0 * x * x); + /* sgn: + * +1 for j in {2, 4, 6, ..} + * -1 for j in {1, 3, 5, ..} + */ + int32_t sgn = (((j & 0x1) == 0) ? +1 : -1); + + if(j == 1) { + return sgn * f; + } else { + return sgn * ::pow(f, j*j); + } + } /*term1_aux*/ + + /* implements P1(x) above; truncating at 4 terms. + * use .distr1() for x > ~1.18 + */ + static double distr1_impl(double x) { + double f = term1_aux(1.0, x); + double f2 = f*f; /*f^2*/ + double f4 = f2*f2; /*f^4*/ + double f8 = f4*f4; /*f^8*/ + double f9 = f8*f; /*f^9*/ + double f16 = f8*f8; /*f^16*/ + + double r = ((f16 + f9) + f4) + f; + + return 1.0 + 2.0*r; + } /*distr1_impl*/ + + /* pi: 3.141592... */ + static constexpr double c_pi = M_PI; + /* pi^2 / 8 */ + static constexpr double c_pi2_8 = 0.125 * c_pi * c_pi; + + /* computes + * + * (2j-1)^2 + * U(x) + * + * require: + * - j >= 1 + */ + static double term2_aux(uint32_t j, double x) { + double u = ::exp(-c_pi2_8 / (x * x)); + + if(j == 1) { + return u; + } else { + uint32_t j2m1 = 2*j - 1; + + return ::pow(u, j2m1 * j2m1); + } + } /*term2_aux*/ + + /* implements P2(x) above; truncating at 4 terms */ + static double distr2_impl(double x) { + static double c_sqrt_2pi + = ::sqrt(2.0 * c_pi); + + double scale = c_sqrt_2pi / x; + + double u = term2_aux(1, x); + double u2 = u*u; /*u^2*/ + double u4 = u2*u2; /*u^4*/ + double u8 = u4*u4; /*u^8*/ + double u16 = u8*u8; /*u^16*/ + double u32 = u16*u16; /*u^32*/ + + double u9 = u8*u; /*u^9*/ + double u25 = u16*u8*u; /*u^25*/ + double u49 = u32*u16*u; /*u^49*/ + + double r = ((u49 + u25) + u9) + u; + + return scale * r; + + } /*distr2_impl*/ + + static double distr_impl(double x) { + using xo::tostr; + + constexpr char const * c_self = "KolmogorovSmirnov::distr_impl"; + + if(x < 0.0) + throw std::runtime_error(tostr(c_self, "KS(x) cdf defined for x>=0")); + + if(x == 0.0) + return 0; + + if(x < 1.18) { + /* P2(x) converges fastest */ + return distr2_impl(x); + } else { + /* P1(x) converges fastest */ + return distr1_impl(x); + } + } /*distr_impl*/ + + /* p-value for significance of a particular value of the KS-statistic + * obtain this statistic for a sample distribution using + * Empirical.ks_stat_1sided(dexp) + * for some target distribution dexp + * + * ne. + * for 1-sided test: #of points in sample, i.e. Empirical.n_sample() + * for 2-sided test: (n1 . n2) / (n1 + n2) + * D. + * max difference between observed and expected cumulative distributions. + * see Empirical.ks_stat_1sided() + */ + static double ks_pvalue(uint32_t ne, double D) + { + using xo::scope; + using xo::xtag; + + constexpr bool logging_enabled_flag = false; + + scope log(XO_DEBUG(logging_enabled_flag)); + + double ne_sqrt = ::sqrt(ne); + + /* argument to KS-distribution. + * x in [0, +oo) + * + * A large value for x -> small value for 1 - KSdist(x), + * i.e. represents probability that a sample of size ne with a + * KS-statistic of magnitude D or larger, could be drawn from + * distribution dexp. + */ + double x = ne_sqrt + 0.12 + (0.11 / ne_sqrt); + + double xD = x * D; + + double pvalue = 1.0 - distr_impl(xD); + + log && log(xtag("ne", ne), + xtag("D", D), + xtag("ne_sqrt", ne_sqrt), + xtag("x", x), + xtag("xD", xD), + xtag("pvalue", pvalue)); + + return pvalue; + } /*ks_pvalue*/ + + /* cumulative distribution function */ + double distribution(double x) const { + return distr_impl(x); + } /*distribution*/ + + // ----- inherited from Distribution ----- + virtual double cdf(double const & x) const { + return this->distribution(x); + } /*cdf*/ + }; /*KolmogorovSmirnov*/ + } /*namespace distribution*/ +} /*namespace xo*/ + +/* end KolmogorovSmirnov.hpp */ diff --git a/xo-distribution/include/xo/distribution/Normal.hpp b/xo-distribution/include/xo/distribution/Normal.hpp new file mode 100644 index 00000000..f3ab1498 --- /dev/null +++ b/xo-distribution/include/xo/distribution/Normal.hpp @@ -0,0 +1,54 @@ +/* @file Normal.hpp */ + +#pragma once + +#include "Distribution.hpp" +#include + +namespace xo { + namespace distribution { + /* the guassian distribution, with mean 0 and variance 1 + */ + class Normal : public Distribution { + public: + Normal() = default; + + /* N(0,1): mean 0, sdev 1 */ + static rp unit() { return new Normal(); } + + /* normal probability density: + * + * x^2 + * -(1/2) 1/2 + * p(x) = e / (2.pi) + */ + static double density(double x) { + static double c_sqrt_2pi = ::sqrt(2 * M_PI); + + return ::exp(-0.5 * x * x) / c_sqrt_2pi; + } /*density*/ + + /* cumulative distribution function for N(0,1): + * + * / x + * | + * | p(x).dx + * | + * / -oo + * + * where p(x) is the normal density function p(x) = e^[-x^2/2] + */ + static double cdf_impl(double x) { + return 0.5 * std::erfc(-M_SQRT1_2 * x); + } /*cdf_impl*/ + + // ----- inherited from Distribution ----- + + virtual double cdf(double const & x) const override { + return cdf_impl(x); + } /*cdf*/ + }; /*Normal*/ + } /*namespace distribution*/ +} /*namespace xo*/ + +/* end Normal.hpp */ diff --git a/xo-distribution/include/xo/distribution/StdEmpirical.hpp b/xo-distribution/include/xo/distribution/StdEmpirical.hpp new file mode 100644 index 00000000..d4475510 --- /dev/null +++ b/xo-distribution/include/xo/distribution/StdEmpirical.hpp @@ -0,0 +1,226 @@ +/* @file StdEmpirical.hpp */ + +#pragma once + +#include "Empirical.hpp" +#include "xo/ordinaltree/RedBlackTree.hpp" +#include "xo/indentlog/scope.hpp" +#include +#include + +namespace xo { + namespace distribution { + /* an empirical distribution over a given domain + * (e.g. double as proxy for IR), + * obtained by sorting equally-weighted samples + */ + template + class StdEmpirical : public Distribution { + public: + using SampleMap = xo::tree::RedBlackTree>; + using const_iterator = typename SampleMap::const_iterator; + + public: + StdEmpirical() = default; + + uint32_t n_sample() const { return n_sample_; } + const_iterator begin() const { return sample_map_.begin(); } + const_iterator end() const { return sample_map_.end(); } + + /* compute kolmogorov-smirnov statistic with a non-sampled distribution. + * if d2 is sampled, should use .ks_stat_2sided() instead + */ + std::pair ks_stat_1sided(Distribution const & d2) const { + using xo::scope; + using xo::xtag; + + constexpr char const * c_self = "Empirical::ks_stat_1sided"; + constexpr bool c_logging_enabled = false; + + scope lscope(c_self, c_logging_enabled); + + double ks_stat = 0.0; + + /* for i'th loop iteration below: + * xj_sum = sum of all x[j] with j<=i + */ + uint32_t xj_sum = 0; + + /* #of sample in this distribution, as double */ + double nr = 1.0 / this->n_sample(); + + /* loop over elements x[i] of this (sampled) distribution, + * compare cdf(x[i]) with d2.cdf(x[i]) + * + * KS stat is the maximum observed difference. + */ + for(auto const & point : this->sample_map_) { + Domain const & xi = point.first; + uint32_t xi_count = point.second; + + xj_sum += xi_count; + + /* p1 = xi_sum / n1, where n1 = .n_sample() */ + double p1 = xj_sum * nr; + double p2 = d2.cdf(xi); + + double dp = std::abs(p1 - p2); + + if(c_logging_enabled) + lscope.log(c_self, + xtag("xi", xi), + xtag("xi_count", xi_count), + xtag("xj_sum", xj_sum), + xtag("p1", p1), + xtag("p2", p2), + xtag("dp", dp)); + + ks_stat = std::max(ks_stat, dp); + } + + return std::pair(this->n_sample(), ks_stat); + } /*ks_stat_1sided*/ + + /* compute kolmogorov-smirnov statistic with a sampled distribution; + * assess likelihood that both samples come from the same population. + */ + std::pair ks_stat_2sided(StdEmpirical const & d2) { + /* loop once over both sample distributions; + * algorithm is O(n1 + n2) for two empirical + * distributions with n1,n2 points respectively + */ + + /* return value observed here */ + double ks_stat = 0.0; + + auto ix1 = this->sample_map_.begin(); + auto end_ix1 = this->sample_map_.end(); + + auto ix2 = d2.sample_map_.begin(); + auto end_ix2 = this->sample_map_.end(); + + uint32_t xj1_sum = 0; + uint32_t xj2_sum = 0; + + uint32_t n1 = this->n_sample(); + uint32_t n2 = d2.n_sample(); + + double nr1 = 1.0 / n1; + double nr2 = 1.0 / n2; + + /* ^ + * 1| **. + * | ...*.. + * | . * + * | ********** + * | * . + * | ......... + * | . * + * | ******** + * | ..*..... + * +-----------------------> + * ^ ^ + * ix1 ix2 + */ + + while ((ix1 != end_ix1) || (ix2 != end_ix2)) { + /* on each iteration, compare sample distributions + * at smallest of (ix1->first, ix2->first) + */ + + bool advance_ix1_flag = false; + bool advance_ix2_flag = false; + + if (ix1 == end_ix1) { + /* only ix2 dereferenceable */ + advance_ix2_flag = true; + } else if (ix2 == end_ix2) { + /* only ix1 dereferenceable */ + advance_ix1_flag = true; + } else { + /* ix1,ix2 both dereferenceable */ + + advance_ix1_flag = (ix1->first <= ix2->first); + advance_ix2_flag = (ix2->first <= ix1->first); + } + + if (advance_ix1_flag) { + xj1_sum += ix1->first; + ++ix1; + } + if (advance_ix2_flag) { + xj2_sum += ix2->first; + ++ix2; + } + + double p1 = xj1_sum * nr1; + double p2 = xj2_sum * nr2; + + double dp = std::abs(p1 - p2); + + ks_stat = std::max(ks_stat, dp); + } + + /* ne = effective #of points to use when comparing 2 sample dist/s */ + double ne = (n1 * n2) / static_cast(n1 + n2); + + return std::pair(ne, ks_stat); + +#ifdef OBSOLETE + /* NOTE: this will cost O(n.log(n)) + * (for empirical distributions with n points) + * if we loop over both sets of points in parallel, + * can get O(n) solution + * + */ + + /* loop over points in *this */ + double ks1 = this->ks_stat_1sided(d2); + /* loop over points in d2 */ + double ks2 = d2.ks_stat_1sided(*this); + + return std::max(ks1, ks2); +#endif + } /*ks_stat_2sided*/ + + // ----- inherited from Empirical ----- + + /* introduce one new sample into this distribution */ + virtual void include_sample(Domain const & x) { + ++(this->n_sample_); + + /* note: xo::tree::RedBlackTree doesn't provide the usual reference result + * from operator[]; it needs to intervene after assignment to update + * order statistics + */ + auto lhs = this->sample_map_[x]; + + lhs += 1; + } /*include_sample*/ + + // ----- inherited from Distribution ----- + + virtual double cdf(Domain const & x) const override { + /* computes #of samples with values <= x */ + uint32_t nx = this->sample_map_.reduce_lub(x, true /*is_closed*/); + size_t n = this->n_sample(); + + return static_cast(nx) / n; + } /*cdf*/ + + private: + /* count #of calls to .include_sample() */ + CounterRep n_sample_ = 0; + + /* .sample_map_[x] counts the #of times + * .include_sample(x) has been called. + */ + SampleMap sample_map_; + }; /*StdEmpirical*/ + + } /*namespace distribution*/ +} /*namespace xo*/ + +/* end StdEmpirical.hpp */ diff --git a/xo-distribution/include/xo/distribution/Uniform.hpp b/xo-distribution/include/xo/distribution/Uniform.hpp new file mode 100644 index 00000000..d701e846 --- /dev/null +++ b/xo-distribution/include/xo/distribution/Uniform.hpp @@ -0,0 +1,57 @@ +/* @file Uniform.hpp */ + +#pragma once + +#include "Distribution.hpp" + +namespace xo { + namespace distribution { + /* uniform distribution on an interval */ + class Uniform : public Distribution { + public: + Uniform(double lo, double hi) : lo_(lo), hi_(hi) {} + + static rp unit() { return new Uniform(0.0, 1.0); } + + static double density_impl(double lo, double hi, double x) { + if (x <= lo) + return 0.0; + if (x >= hi) + return 0.0; + + return 1.0 / (hi - lo); + } /*density_impl*/ + + static double distr_impl(double lo, double hi, double x) { + if (x <= lo) + return 0.0; + if (x >= hi) + return 1.0; + + return (x - lo) / (hi - lo); + } /*distr_impl*/ + + double lo() const { return lo_; } + double hi() const { return hi_; } + + double density(double x) const { + return density_impl(this->lo_, this->hi_, x); + } /*density*/ + + double distribution(double x) const { + return distr_impl(this->lo_, this->hi_, x); + } /*distribution*/ + + // ----- inherited from Distribution ----- + + virtual double cdf(double const & x) const override { + return this->distribution(x); + } /*cdf*/ + + private: + /* Invariant: .lo < .hi */ + double lo_ = 0.0; + double hi_ = 1.0; + }; /*Uniform*/ + } /*namespace distribution*/ +} /*namespace xo*/ diff --git a/xo-distribution/src/distribution/CMakeLists.txt b/xo-distribution/src/distribution/CMakeLists.txt new file mode 100644 index 00000000..41023695 --- /dev/null +++ b/xo-distribution/src/distribution/CMakeLists.txt @@ -0,0 +1,5 @@ +set(SELF_LIB xo_distribution) +set(SELF_SRCS Normal.cpp) + +xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS}) +xo_dependency(${SELF_LIB} refcnt) diff --git a/xo-distribution/src/distribution/Normal.cpp b/xo-distribution/src/distribution/Normal.cpp new file mode 100644 index 00000000..f1a0f9ca --- /dev/null +++ b/xo-distribution/src/distribution/Normal.cpp @@ -0,0 +1,9 @@ +/* @file Normal.cpp */ + +#include "Normal.hpp" + +namespace xo { + +} /*namespace xo*/ + +/* end Normal.cpp */ diff --git a/xo-distribution/utest/CMakeLists.txt b/xo-distribution/utest/CMakeLists.txt new file mode 100644 index 00000000..e768c09a --- /dev/null +++ b/xo-distribution/utest/CMakeLists.txt @@ -0,0 +1,13 @@ +# build unittest distribution/utest + +set(SELF_EXE utest.distribution) +set(SELF_SRCS + distribution_utest_main.cpp + Normal.test.cpp + Uniform.test.cpp) + +xo_add_utest_executable(${SELF_EXE} ${SELF_SRCS}) +xo_self_dependency(${SELF_EXE} xo_distribution) +xo_external_target_dependency(${SELF_EXE} Catch2 Catch2::Catch2) + +# end CMakeLists.txt diff --git a/xo-distribution/utest/Normal.test.cpp b/xo-distribution/utest/Normal.test.cpp new file mode 100644 index 00000000..70f0a7ad --- /dev/null +++ b/xo-distribution/utest/Normal.test.cpp @@ -0,0 +1,24 @@ +/* @file Normal.test.cpp */ + +#include "xo/distribution/Normal.hpp" +#include + +namespace xo { + using xo::distribution::Normal; + + namespace ut { + TEST_CASE("normal", "[distribution]") { + auto n01 = Normal::unit(); + + CHECK(n01->cdf(-3.0) == Approx(0.001349898).margin(1e-9)); + CHECK(n01->cdf(-2.0) == Approx(0.0227501319).margin(1e-9)); + CHECK(n01->cdf(-1.0) == Approx(0.1586552539).margin(1e-9)); + CHECK(n01->cdf(0.0) == 0.5); + CHECK(n01->cdf(1.0) == 1.0 - n01->cdf(-1.0)); + CHECK(n01->cdf(2.0) == 1.0 - n01->cdf(-2.0)); + CHECK(n01->cdf(3.0) == 1.0 - n01->cdf(-3.0)); + } /*TEST_CASE(normal)*/ + } /*namespace ut*/ +} /*namespace xo*/ + +/* end Normal.test.cpp */ diff --git a/xo-distribution/utest/Uniform.test.cpp b/xo-distribution/utest/Uniform.test.cpp new file mode 100644 index 00000000..10153d66 --- /dev/null +++ b/xo-distribution/utest/Uniform.test.cpp @@ -0,0 +1,27 @@ +/* @file Uniform.test.cpp */ + +#include "xo/distribution/Uniform.hpp" +#include + +namespace xo { + using xo::distribution::Uniform; + + namespace ut { + TEST_CASE("uniform", "[distribution]") { + auto u = Uniform::unit(); + + CHECK(u->cdf(-3.0) == 0.0); + CHECK(u->cdf(-2.0) == 0.0); + CHECK(u->cdf(-1.0) == 0.0); + CHECK(u->cdf(0.0) == 0.0); + CHECK(u->cdf(0.05) == 0.05); + CHECK(u->cdf(0.5) == 0.5); + CHECK(u->cdf(0.95) == 0.95); + CHECK(u->cdf(1.0) == 1.0); + CHECK(u->cdf(2.0) == 1.0); + CHECK(u->cdf(3.0) == 1.0); + } /*TEST_CASE(uniform)*/ + } /*namespace ut*/ +} /*namespace xo*/ + +/* end Uniform.test.cpp */ diff --git a/xo-distribution/utest/distribution_utest_main.cpp b/xo-distribution/utest/distribution_utest_main.cpp new file mode 100644 index 00000000..b5e05a73 --- /dev/null +++ b/xo-distribution/utest/distribution_utest_main.cpp @@ -0,0 +1,6 @@ +/* file distribution_utest_main.cpp */ + +#define CATCH_CONFIG_MAIN +#include "catch2/catch.hpp" + +/* end distribution_utest_main.cpp */