xo-tokenizer: refactor to correct accounting for line/consume/errpos
This commit is contained in:
parent
2c21eede1f
commit
dd41635a56
2 changed files with 159 additions and 38 deletions
|
|
@ -18,11 +18,11 @@ namespace xo {
|
||||||
* allocation order:
|
* allocation order:
|
||||||
* ----------------------->
|
* ----------------------->
|
||||||
*
|
*
|
||||||
* <----------------- .size() ------------------>
|
* <----------------- .size(), .reserved() --------------------------->
|
||||||
* <----------------- .committed() --------------->
|
* <----------------- .committed() ------------->
|
||||||
*
|
*
|
||||||
* <-------allocated------><--------free--------> <---uncommitted---->
|
* <-------allocated------><--------free--------><-----uncommitted---->
|
||||||
* XXXXXXXXXXXXXXXXXXXXXXXX______________________ ....................
|
* XXXXXXXXXXXXXXXXXXXXXXXX______________________......................
|
||||||
* ^ ^ ^ ^ ^
|
* ^ ^ ^ ^ ^
|
||||||
* lo checkpoint free limit hi
|
* lo checkpoint free limit hi
|
||||||
*
|
*
|
||||||
|
|
@ -31,12 +31,77 @@ namespace xo {
|
||||||
* > < .before_checkpoint()
|
* > < .before_checkpoint()
|
||||||
* > < .after_checkpoint()
|
* > < .after_checkpoint()
|
||||||
*
|
*
|
||||||
|
* lifetime:
|
||||||
|
*
|
||||||
|
* 1. initial state after ctor
|
||||||
|
*
|
||||||
|
* >< committed()=0
|
||||||
|
* <---------------------------uncommitted---------------------------->
|
||||||
|
* ....................................................................
|
||||||
|
* ^ ^
|
||||||
|
* lo hi
|
||||||
|
* checkpoint
|
||||||
|
* free
|
||||||
|
* limit
|
||||||
|
*
|
||||||
|
* 1a. one call to ::mmap()
|
||||||
|
* 1b. vm address space [lo,hi) is reserved
|
||||||
|
* 1c. address space [lo,hi) is inaccessible. no read|write|execute permission
|
||||||
|
*
|
||||||
|
* 2. after first allocation of n bytes
|
||||||
|
*
|
||||||
|
* <--committed--->
|
||||||
|
* <--free--><--------------------uncommitted-------------------->
|
||||||
|
* > <- allocated
|
||||||
|
* XXXXXX__________.....................................................
|
||||||
|
* ^ ^ ^ ^
|
||||||
|
* lo lo+n limit hi
|
||||||
|
* ^ free
|
||||||
|
* checkpoint
|
||||||
|
*
|
||||||
|
* 2a. committed just enough hugepages (2mb each) to accomodate n,
|
||||||
|
* i.e. expand-on-demand:
|
||||||
|
* - one call to ::mprotect()
|
||||||
|
* - .limit = .lo + (k+1) * .hugepage_z for some integer k>=0
|
||||||
|
* - k * .page_z <= n < (k+1) * .hugepage_z
|
||||||
|
* 2b. expect immediate cost 1-5us, includes:
|
||||||
|
* - TLB flush
|
||||||
|
* invalidate TLB entries for committed range on all cores that this
|
||||||
|
* process' threads have run on since process inception.
|
||||||
|
* Also, if a kernel thread has run on one of said cores, it may
|
||||||
|
* have borrowed our TLB entries
|
||||||
|
* - page table update
|
||||||
|
* write to entry for each vm page
|
||||||
|
* - kernel overhead 100-1000 cycles (< 1us)
|
||||||
|
* 2c. expect deferred cost 1us-2us per hugepage:
|
||||||
|
* - committed pages aren't backed by physical memory until
|
||||||
|
* first touched; minor page fault on first access for each page.
|
||||||
|
* - so about 256-512us for 1MB
|
||||||
|
* 3. after .expand(z)
|
||||||
|
*
|
||||||
|
* <-------------committed------------>
|
||||||
|
* <------------free------------><----------uncomitted----------->
|
||||||
|
* > <- allocated
|
||||||
|
* XXXXXX______________________________.................................
|
||||||
|
* ^ ^ ^ ^
|
||||||
|
* lo lo+n limit hi
|
||||||
|
* ^ free
|
||||||
|
* checkpoint
|
||||||
|
*
|
||||||
|
* 3a. same as case 2. but without advancing .free pointer.
|
||||||
|
*
|
||||||
|
* 4. after dtor
|
||||||
|
*
|
||||||
|
* 4a. all memory returned to o/s, no longer reserved.
|
||||||
|
* - one call to ::munmap()
|
||||||
|
*
|
||||||
* @endtext
|
* @endtext
|
||||||
*
|
*
|
||||||
* Design Notes:
|
* Design Notes:
|
||||||
* - non-copyable, non-moveable
|
* - non-copyable, non-moveable
|
||||||
* - always heap-allocated
|
|
||||||
* - @ref lo_ <= @ref checkpoint_ <= @ref free_ <= @ref limit_ <= @ref hi_
|
* - @ref lo_ <= @ref checkpoint_ <= @ref free_ <= @ref limit_ <= @ref hi_
|
||||||
|
* - memory for ArenaAlloc itself (not the memory it allocates), ~100 bytes
|
||||||
|
* always heap allocated. Use ArenaAlloc::make()
|
||||||
* - memory obtained from mmap(), not heap
|
* - memory obtained from mmap(), not heap
|
||||||
* - memory addresses are stable. Expand storage by committing VM pages.
|
* - memory addresses are stable. Expand storage by committing VM pages.
|
||||||
* - @ref lo_ is aligned on VM page size (guaranteed by mmap())
|
* - @ref lo_ is aligned on VM page size (guaranteed by mmap())
|
||||||
|
|
@ -55,7 +120,7 @@ namespace xo {
|
||||||
|
|
||||||
/** Create allocator with capacity @p z,
|
/** Create allocator with capacity @p z,
|
||||||
* Reserve memory addresses for @p z bytes,
|
* Reserve memory addresses for @p z bytes,
|
||||||
* but don't commit them until needed
|
* (but don't commit them until needed)
|
||||||
**/
|
**/
|
||||||
static up<ArenaAlloc> make(const std::string & name,
|
static up<ArenaAlloc> make(const std::string & name,
|
||||||
std::size_t z,
|
std::size_t z,
|
||||||
|
|
@ -127,7 +192,12 @@ namespace xo {
|
||||||
std::string name_;
|
std::string name_;
|
||||||
|
|
||||||
/** size of a VM page (from getpagesize()) **/
|
/** size of a VM page (from getpagesize()) **/
|
||||||
std::size_t page_z_;
|
std::size_t page_z_ = 0;
|
||||||
|
|
||||||
|
/** size of a huge VM page. hardwiring this in ctor (to 2MB).
|
||||||
|
* larger pages relieve pressure on TLB, but suboptimal if use << 2MB
|
||||||
|
**/
|
||||||
|
std::size_t hugepage_z_ = 0;
|
||||||
|
|
||||||
/** allocator owns memory in range [@ref lo_, @ref hi_) **/
|
/** allocator owns memory in range [@ref lo_, @ref hi_) **/
|
||||||
std::byte * lo_ = nullptr;
|
std::byte * lo_ = nullptr;
|
||||||
|
|
@ -139,7 +209,7 @@ namespace xo {
|
||||||
* older (addresses below checkpoint)
|
* older (addresses below checkpoint)
|
||||||
* and younger (addresses above checkpoint)
|
* and younger (addresses above checkpoint)
|
||||||
**/
|
**/
|
||||||
std::byte * checkpoint_;
|
std::byte * checkpoint_ = nullptr;
|
||||||
/** free pointer. memory in range [@ref free_, @ref limit_) available **/
|
/** free pointer. memory in range [@ref free_, @ref limit_) available **/
|
||||||
std::byte * free_ptr_ = nullptr;
|
std::byte * free_ptr_ = nullptr;
|
||||||
/** soft limit: end of committed virtual memory **/
|
/** soft limit: end of committed virtual memory **/
|
||||||
|
|
|
||||||
|
|
@ -13,37 +13,101 @@
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
namespace xo {
|
namespace xo {
|
||||||
|
using std::byte;
|
||||||
|
|
||||||
namespace gc {
|
namespace gc {
|
||||||
|
namespace {
|
||||||
|
/* alignment better be a power of 2 */
|
||||||
|
std::size_t
|
||||||
|
align_lub(std::size_t x, std::size_t align)
|
||||||
|
{
|
||||||
|
/* e.g:
|
||||||
|
* align = 4096, x%align = 100 -> dx = 3996
|
||||||
|
* align = 4096, x%align = 0 -> dx = 0
|
||||||
|
*/
|
||||||
|
std::size_t dx = (align - (x % align)) % align;
|
||||||
|
|
||||||
|
return x + dx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ArenaAlloc::ArenaAlloc(const std::string & name,
|
ArenaAlloc::ArenaAlloc(const std::string & name,
|
||||||
std::size_t z, bool debug_flag)
|
std::size_t z,
|
||||||
|
bool debug_flag)
|
||||||
{
|
{
|
||||||
scope log(XO_DEBUG(debug_flag), xtag("name", name));
|
scope log(XO_DEBUG(debug_flag), xtag("name", name));
|
||||||
|
|
||||||
|
constexpr size_t c_hugepage_z = 2 * 1024 * 1024;
|
||||||
|
|
||||||
this->name_ = name;
|
this->name_ = name;
|
||||||
this->page_z_ = getpagesize();
|
this->page_z_ = getpagesize();
|
||||||
|
this->hugepage_z_ = c_hugepage_z;
|
||||||
|
|
||||||
// reserve virtual memory
|
// 1. need k pagetable entries where k is lub {k | k * .page_z >= z}
|
||||||
|
// 2. base will be aligned with .page_z but likely not with .hugepage_z
|
||||||
|
// 3. bad to have misalignment, because misaligned {prefix, suffix} of [base, base+z)
|
||||||
|
// will use 4k pages instead of 2mb pages
|
||||||
|
//
|
||||||
|
// strategy:
|
||||||
|
// 4. round up z to multiple of c_hugepage_z
|
||||||
|
// 5. over-request so reserved range contains an aligned subrange of size z
|
||||||
|
// 6. unmap misaligned prefix
|
||||||
|
// 7. unmap misaligned suffix.
|
||||||
|
// 8. enable huge pages for now-aligned remainder of reserved range
|
||||||
|
//
|
||||||
|
// Z. note: rejecting inferior MAP_HUGETLB|MAP_HUGE_2MB flags on ::mmap here:
|
||||||
|
// Za. requires previously-reserved memory in /proc/sys/vm/nr_hugepages
|
||||||
|
// Zb. reserved pages permenently resident in RAM, never swapped
|
||||||
|
// Zc. memory cost incurred even if no application is using said pages
|
||||||
|
|
||||||
void * base = mmap(nullptr, z, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
z = align_lub(z, c_hugepage_z); // 4.
|
||||||
|
|
||||||
|
// 5.
|
||||||
|
byte * base = reinterpret_cast<byte *>(::mmap(nullptr,
|
||||||
|
z + c_hugepage_z,
|
||||||
|
PROT_NONE,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||||
|
-1, 0));
|
||||||
|
|
||||||
log && log("acquired memory [lo,hi) using mmap",
|
log && log("acquired memory [lo,hi) using mmap",
|
||||||
xtag("lo", base),
|
xtag("lo", base),
|
||||||
xtag("z", z),
|
xtag("z", z),
|
||||||
xtag("hi", reinterpret_cast<std::byte *>(base) + z));
|
xtag("hi", reinterpret_cast<byte *>(base) + z));
|
||||||
|
|
||||||
// could use this as fallback..
|
|
||||||
//base = (new std::byte [z]);
|
|
||||||
|
|
||||||
if (base == MAP_FAILED) {
|
if (base == MAP_FAILED) {
|
||||||
throw std::runtime_error(tostr("ArenaAlloc: uncommitted allocation failed",
|
throw std::runtime_error(tostr("ArenaAlloc: uncommitted allocation failed",
|
||||||
xtag("size", z)));
|
xtag("size", z)));
|
||||||
}
|
}
|
||||||
|
|
||||||
this->lo_ = reinterpret_cast<std::byte *>(base);
|
byte * aligned_base = reinterpret_cast<byte *>(align_lub(reinterpret_cast<size_t>(base),
|
||||||
|
c_hugepage_z));
|
||||||
|
|
||||||
|
assert(reinterpret_cast<size_t>(aligned_base) % c_hugepage_z == 0);
|
||||||
|
assert(aligned_base >= base);
|
||||||
|
assert(aligned_base < base + c_hugepage_z);
|
||||||
|
|
||||||
|
if (base < aligned_base) {
|
||||||
|
size_t prefix = aligned_base - base;
|
||||||
|
|
||||||
|
::munmap(base, prefix); // 6.
|
||||||
|
}
|
||||||
|
|
||||||
|
byte * aligned_hi = aligned_base + z;
|
||||||
|
byte * hi = base + z + c_hugepage_z;
|
||||||
|
|
||||||
|
if (aligned_hi < hi) {
|
||||||
|
size_t suffix = hi - aligned_hi;
|
||||||
|
|
||||||
|
::munmap(aligned_hi, suffix); // 7.
|
||||||
|
}
|
||||||
|
|
||||||
|
::madvise(aligned_base, z, MADV_HUGEPAGE); // 8.
|
||||||
|
|
||||||
|
this->lo_ = aligned_base;
|
||||||
this->committed_z_ = 0;
|
this->committed_z_ = 0;
|
||||||
this->checkpoint_ = lo_;
|
this->checkpoint_ = lo_;
|
||||||
this->free_ptr_ = lo_;
|
this->free_ptr_ = lo_;
|
||||||
this->limit_ = lo_ + z;
|
this->limit_ = lo_;
|
||||||
this->hi_ = lo_ + z;
|
this->hi_ = lo_ + z;
|
||||||
this->debug_flag_ = debug_flag;
|
this->debug_flag_ = debug_flag;
|
||||||
|
|
||||||
|
|
@ -52,7 +116,9 @@ namespace xo {
|
||||||
xtag("size", z)));
|
xtag("size", z)));
|
||||||
}
|
}
|
||||||
|
|
||||||
log && log(xtag("lo", (void*)lo_), xtag("page_z", page_z_));
|
log && log(xtag("lo", (void*)lo_),
|
||||||
|
xtag("page_z", page_z_),
|
||||||
|
xtag("hugepage_z", hugepage_z_));
|
||||||
}
|
}
|
||||||
|
|
||||||
ArenaAlloc::~ArenaAlloc()
|
ArenaAlloc::~ArenaAlloc()
|
||||||
|
|
@ -64,7 +130,7 @@ namespace xo {
|
||||||
if (lo_) {
|
if (lo_) {
|
||||||
log && log("unmap [lo,hi)", xtag("lo", lo_), xtag("z", hi_ - lo_), xtag("hi", hi_));
|
log && log("unmap [lo,hi)", xtag("lo", lo_), xtag("z", hi_ - lo_), xtag("hi", hi_));
|
||||||
|
|
||||||
munmap(lo_, hi_ - lo_);
|
::munmap(lo_, hi_ - lo_);
|
||||||
}
|
}
|
||||||
// could use this as fallback if we dropped the uncommitted technique
|
// could use this as fallback if we dropped the uncommitted technique
|
||||||
//delete [] this->lo_;
|
//delete [] this->lo_;
|
||||||
|
|
@ -86,21 +152,6 @@ namespace xo {
|
||||||
z, debug_flag));
|
z, debug_flag));
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
|
||||||
/* alignment better be a power of 2 */
|
|
||||||
std::size_t
|
|
||||||
align_lub(std::size_t x, std::size_t align)
|
|
||||||
{
|
|
||||||
/* e.g:
|
|
||||||
* align = 4096, x%align = 100 -> dx = 3996
|
|
||||||
* align = 4096, x%align = 0 -> dx = 0
|
|
||||||
*/
|
|
||||||
std::size_t dx = (align - (x % align)) % align;
|
|
||||||
|
|
||||||
return x + dx;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
ArenaAlloc::expand(size_t offset_z)
|
ArenaAlloc::expand(size_t offset_z)
|
||||||
{
|
{
|
||||||
|
|
@ -118,7 +169,7 @@ namespace xo {
|
||||||
xtag("requested", offset_z), xtag("reserved", reserved())));
|
xtag("requested", offset_z), xtag("reserved", reserved())));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t aligned_offset_z = align_lub(offset_z, page_z_);
|
std::size_t aligned_offset_z = align_lub(offset_z, hugepage_z_);
|
||||||
std::byte * commit_start = lo_ + committed_z_;
|
std::byte * commit_start = lo_ + committed_z_;
|
||||||
std::size_t add_commit_z = aligned_offset_z - committed_z_;
|
std::size_t add_commit_z = aligned_offset_z - committed_z_;
|
||||||
|
|
||||||
|
|
@ -130,7 +181,7 @@ namespace xo {
|
||||||
xtag("add_commit_z", add_commit_z),
|
xtag("add_commit_z", add_commit_z),
|
||||||
xtag("commit_end", commit_start + add_commit_z));
|
xtag("commit_end", commit_start + add_commit_z));
|
||||||
|
|
||||||
if (mprotect(commit_start, add_commit_z, PROT_READ | PROT_WRITE) != 0) {
|
if (::mprotect(commit_start, add_commit_z, PROT_READ | PROT_WRITE) != 0) {
|
||||||
throw std::runtime_error(tostr("ArenaAlloc::expand: commit failure",
|
throw std::runtime_error(tostr("ArenaAlloc::expand: commit failure",
|
||||||
xtag("committed_z", committed_z_),
|
xtag("committed_z", committed_z_),
|
||||||
xtag("add_commit_z", add_commit_z)));
|
xtag("add_commit_z", add_commit_z)));
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue