From dd41635a5652d267f0878bb04b03eb85572be97f Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Sat, 22 Nov 2025 20:13:33 -0500 Subject: [PATCH] xo-tokenizer: refactor to correct accounting for line/consume/errpos --- include/xo/alloc/ArenaAlloc.hpp | 86 ++++++++++++++++++++++--- src/alloc/ArenaAlloc.cpp | 107 +++++++++++++++++++++++--------- 2 files changed, 157 insertions(+), 36 deletions(-) diff --git a/include/xo/alloc/ArenaAlloc.hpp b/include/xo/alloc/ArenaAlloc.hpp index da67f8f2..e0bfed2f 100644 --- a/include/xo/alloc/ArenaAlloc.hpp +++ b/include/xo/alloc/ArenaAlloc.hpp @@ -18,11 +18,11 @@ namespace xo { * allocation order: * -----------------------> * - * <----------------- .size() ------------------> - * <----------------- .committed() ---------------> + * <----------------- .size(), .reserved() ---------------------------> + * <----------------- .committed() -------------> * - * <-------allocated------><--------free--------> <---uncommitted----> - * XXXXXXXXXXXXXXXXXXXXXXXX______________________ .................... + * <-------allocated------><--------free--------><-----uncommitted----> + * XXXXXXXXXXXXXXXXXXXXXXXX______________________...................... * ^ ^ ^ ^ ^ * lo checkpoint free limit hi * @@ -31,12 +31,77 @@ namespace xo { * > < .before_checkpoint() * > < .after_checkpoint() * + * lifetime: + * + * 1. initial state after ctor + * + * >< committed()=0 + * <---------------------------uncommitted----------------------------> + * .................................................................... + * ^ ^ + * lo hi + * checkpoint + * free + * limit + * + * 1a. one call to ::mmap() + * 1b. vm address space [lo,hi) is reserved + * 1c. address space [lo,hi) is inaccessible. no read|write|execute permission + * + * 2. after first allocation of n bytes + * + * <--committed---> + * <--free--><--------------------uncommitted--------------------> + * > <- allocated + * XXXXXX__________..................................................... + * ^ ^ ^ ^ + * lo lo+n limit hi + * ^ free + * checkpoint + * + * 2a. committed just enough hugepages (2mb each) to accomodate n, + * i.e. expand-on-demand: + * - one call to ::mprotect() + * - .limit = .lo + (k+1) * .hugepage_z for some integer k>=0 + * - k * .page_z <= n < (k+1) * .hugepage_z + * 2b. expect immediate cost 1-5us, includes: + * - TLB flush + * invalidate TLB entries for committed range on all cores that this + * process' threads have run on since process inception. + * Also, if a kernel thread has run on one of said cores, it may + * have borrowed our TLB entries + * - page table update + * write to entry for each vm page + * - kernel overhead 100-1000 cycles (< 1us) + * 2c. expect deferred cost 1us-2us per hugepage: + * - committed pages aren't backed by physical memory until + * first touched; minor page fault on first access for each page. + * - so about 256-512us for 1MB + * 3. after .expand(z) + * + * <-------------committed------------> + * <------------free------------><----------uncomitted-----------> + * > <- allocated + * XXXXXX______________________________................................. + * ^ ^ ^ ^ + * lo lo+n limit hi + * ^ free + * checkpoint + * + * 3a. same as case 2. but without advancing .free pointer. + * + * 4. after dtor + * + * 4a. all memory returned to o/s, no longer reserved. + * - one call to ::munmap() + * * @endtext * * Design Notes: * - non-copyable, non-moveable - * - always heap-allocated * - @ref lo_ <= @ref checkpoint_ <= @ref free_ <= @ref limit_ <= @ref hi_ + * - memory for ArenaAlloc itself (not the memory it allocates), ~100 bytes + * always heap allocated. Use ArenaAlloc::make() * - memory obtained from mmap(), not heap * - memory addresses are stable. Expand storage by committing VM pages. * - @ref lo_ is aligned on VM page size (guaranteed by mmap()) @@ -55,7 +120,7 @@ namespace xo { /** Create allocator with capacity @p z, * Reserve memory addresses for @p z bytes, - * but don't commit them until needed + * (but don't commit them until needed) **/ static up make(const std::string & name, std::size_t z, @@ -127,7 +192,12 @@ namespace xo { std::string name_; /** size of a VM page (from getpagesize()) **/ - std::size_t page_z_; + std::size_t page_z_ = 0; + + /** size of a huge VM page. hardwiring this in ctor (to 2MB). + * larger pages relieve pressure on TLB, but suboptimal if use << 2MB + **/ + std::size_t hugepage_z_ = 0; /** allocator owns memory in range [@ref lo_, @ref hi_) **/ std::byte * lo_ = nullptr; @@ -139,7 +209,7 @@ namespace xo { * older (addresses below checkpoint) * and younger (addresses above checkpoint) **/ - std::byte * checkpoint_; + std::byte * checkpoint_ = nullptr; /** free pointer. memory in range [@ref free_, @ref limit_) available **/ std::byte * free_ptr_ = nullptr; /** soft limit: end of committed virtual memory **/ diff --git a/src/alloc/ArenaAlloc.cpp b/src/alloc/ArenaAlloc.cpp index febbcb61..0a0365e2 100644 --- a/src/alloc/ArenaAlloc.cpp +++ b/src/alloc/ArenaAlloc.cpp @@ -13,37 +13,101 @@ #include namespace xo { + using std::byte; + namespace gc { + namespace { + /* alignment better be a power of 2 */ + std::size_t + align_lub(std::size_t x, std::size_t align) + { + /* e.g: + * align = 4096, x%align = 100 -> dx = 3996 + * align = 4096, x%align = 0 -> dx = 0 + */ + std::size_t dx = (align - (x % align)) % align; + + return x + dx; + } + } + ArenaAlloc::ArenaAlloc(const std::string & name, - std::size_t z, bool debug_flag) + std::size_t z, + bool debug_flag) { scope log(XO_DEBUG(debug_flag), xtag("name", name)); + constexpr size_t c_hugepage_z = 2 * 1024 * 1024; + this->name_ = name; this->page_z_ = getpagesize(); + this->hugepage_z_ = c_hugepage_z; - // reserve virtual memory + // 1. need k pagetable entries where k is lub {k | k * .page_z >= z} + // 2. base will be aligned with .page_z but likely not with .hugepage_z + // 3. bad to have misalignment, because misaligned {prefix, suffix} of [base, base+z) + // will use 4k pages instead of 2mb pages + // + // strategy: + // 4. round up z to multiple of c_hugepage_z + // 5. over-request so reserved range contains an aligned subrange of size z + // 6. unmap misaligned prefix + // 7. unmap misaligned suffix. + // 8. enable huge pages for now-aligned remainder of reserved range + // + // Z. note: rejecting inferior MAP_HUGETLB|MAP_HUGE_2MB flags on ::mmap here: + // Za. requires previously-reserved memory in /proc/sys/vm/nr_hugepages + // Zb. reserved pages permenently resident in RAM, never swapped + // Zc. memory cost incurred even if no application is using said pages - void * base = mmap(nullptr, z, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + z = align_lub(z, c_hugepage_z); // 4. + + // 5. + byte * base = reinterpret_cast(::mmap(nullptr, + z + c_hugepage_z, + PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0)); log && log("acquired memory [lo,hi) using mmap", xtag("lo", base), xtag("z", z), - xtag("hi", reinterpret_cast(base) + z)); - - // could use this as fallback.. - //base = (new std::byte [z]); + xtag("hi", reinterpret_cast(base) + z)); if (base == MAP_FAILED) { throw std::runtime_error(tostr("ArenaAlloc: uncommitted allocation failed", xtag("size", z))); } - this->lo_ = reinterpret_cast(base); + byte * aligned_base = reinterpret_cast(align_lub(reinterpret_cast(base), + c_hugepage_z)); + + assert(reinterpret_cast(aligned_base) % c_hugepage_z == 0); + assert(aligned_base >= base); + assert(aligned_base < base + c_hugepage_z); + + if (base < aligned_base) { + size_t prefix = aligned_base - base; + + ::munmap(base, prefix); // 6. + } + + byte * aligned_hi = aligned_base + z; + byte * hi = base + z + c_hugepage_z; + + if (aligned_hi < hi) { + size_t suffix = hi - aligned_hi; + + ::munmap(aligned_hi, suffix); // 7. + } + + ::madvise(aligned_base, z, MADV_HUGEPAGE); // 8. + + this->lo_ = aligned_base; this->committed_z_ = 0; this->checkpoint_ = lo_; this->free_ptr_ = lo_; - this->limit_ = lo_ + z; + this->limit_ = lo_; this->hi_ = lo_ + z; this->debug_flag_ = debug_flag; @@ -52,7 +116,9 @@ namespace xo { xtag("size", z))); } - log && log(xtag("lo", (void*)lo_), xtag("page_z", page_z_)); + log && log(xtag("lo", (void*)lo_), + xtag("page_z", page_z_), + xtag("hugepage_z", hugepage_z_)); } ArenaAlloc::~ArenaAlloc() @@ -64,7 +130,7 @@ namespace xo { if (lo_) { log && log("unmap [lo,hi)", xtag("lo", lo_), xtag("z", hi_ - lo_), xtag("hi", hi_)); - munmap(lo_, hi_ - lo_); + ::munmap(lo_, hi_ - lo_); } // could use this as fallback if we dropped the uncommitted technique //delete [] this->lo_; @@ -86,21 +152,6 @@ namespace xo { z, debug_flag)); } - namespace { - /* alignment better be a power of 2 */ - std::size_t - align_lub(std::size_t x, std::size_t align) - { - /* e.g: - * align = 4096, x%align = 100 -> dx = 3996 - * align = 4096, x%align = 0 -> dx = 0 - */ - std::size_t dx = (align - (x % align)) % align; - - return x + dx; - } - } - bool ArenaAlloc::expand(size_t offset_z) { @@ -118,7 +169,7 @@ namespace xo { xtag("requested", offset_z), xtag("reserved", reserved()))); } - std::size_t aligned_offset_z = align_lub(offset_z, page_z_); + std::size_t aligned_offset_z = align_lub(offset_z, hugepage_z_); std::byte * commit_start = lo_ + committed_z_; std::size_t add_commit_z = aligned_offset_z - committed_z_; @@ -130,7 +181,7 @@ namespace xo { xtag("add_commit_z", add_commit_z), xtag("commit_end", commit_start + add_commit_z)); - if (mprotect(commit_start, add_commit_z, PROT_READ | PROT_WRITE) != 0) { + if (::mprotect(commit_start, add_commit_z, PROT_READ | PROT_WRITE) != 0) { throw std::runtime_error(tostr("ArenaAlloc::expand: commit failure", xtag("committed_z", committed_z_), xtag("add_commit_z", add_commit_z)));