Skip to content

Commit

Permalink
[gc_heap] Refactor to Space object.
Browse files Browse the repository at this point in the history
This is the "right" design; femtolisp is a bit awkward.

Also start a demo of heap growth.  Not sure if we'll use it.
  • Loading branch information
Andy Chu committed Dec 12, 2020
1 parent 719e232 commit b394bae
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 56 deletions.
67 changes: 67 additions & 0 deletions mycpp/demo/heap_growth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env python2
"""
heap_growth.py
"""
from __future__ import print_function

import sys


class Space(object):
def __init__(self, space_size):
self.filled_size = 0
self.space_size = space_size


def Simulate(spaces, alloc_sizes):
# TODO:
# - how to simulate garbage too
# - simulate semi-spaces
# - eventually we could TIGHTEN the heap? Actually we might get that for
# free?

# Input:
# - Stream of Allocation Sizes
# Output:
# - Whether we should collect now
# - this happens as rarely as possible, only when we have no space
# - Whether we should grow, and HOW MUCH (2x, 4x)
# - this happens AFTER a collection, if we don't have much space left
# - And we try to keep the sizes even

space = spaces[0]

for i, a in enumerate(alloc_sizes):
if space.filled_size + a > space.space_size:
do_collect = True
else:
do_collect = False

# Assume we didn't collect anything
while float(space.filled_size) / space.space_size >= 0.8:
space.space_size *= 2

space.filled_size += a

yield a, space.filled_size, space.space_size, do_collect


def main(argv):
initial_size = 256
spaces = [Space(initial_size), Space(initial_size)]

fmt = '%10s %10s %10s %10s'
print(fmt % ('alloc', 'filled', 'space max', 'collect'))

#alloc_sizes = range(50, 100)
alloc_sizes = range(0, 10000, 400) # big allocations
for row in Simulate(spaces, alloc_sizes):
print(fmt % row)


if __name__ == '__main__':
try:
main(sys.argv)
except RuntimeError as e:
print('FATAL: %s' % e, file=sys.stderr)
sys.exit(1)
52 changes: 27 additions & 25 deletions mycpp/gc_heap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,13 @@ void Heap::Collect(bool must_grow) {
num_collections_++;
#endif

scan_ = to_space_; // boundary between black and gray
free_ = to_space_; // where to copy new entries
char* scan = to_space_.begin_; // boundary between black and gray
free_ = to_space_.begin_; // where to copy new entries

if (grew_) {
limit_ = to_space_ + space_size_ * 2;
limit_ = to_space_.begin_ + to_space_.size_ * 2;
} else {
limit_ = to_space_ + space_size_;
limit_ = to_space_.begin_ + to_space_.size_;
}

#if GC_DEBUG
Expand Down Expand Up @@ -118,8 +118,8 @@ void Heap::Collect(bool must_grow) {
}
}

while (scan_ < free_) {
auto obj = reinterpret_cast<Obj*>(scan_);
while (scan < free_) {
auto obj = reinterpret_cast<Obj*>(scan);
switch (obj->heap_tag_) {
case Tag::FixedSize: {
auto fixed = reinterpret_cast<LayoutFixed*>(obj);
Expand Down Expand Up @@ -151,7 +151,7 @@ void Heap::Collect(bool must_grow) {

// other tags like Tag::Opaque have no children
}
scan_ += obj->obj_len_;
scan += obj->obj_len_;
}

#if GC_DEBUG
Expand All @@ -160,21 +160,22 @@ void Heap::Collect(bool must_grow) {
// num_live_objs_);
#endif

// Subtle logic for growing the heap. Copied from femtolisp.
//
// Happy Path:
// The collection brought us from "full" to more than 20% heap free.
// Then we can continue to allocate int he new space until it is full, and
// not grow the heap.
//
// When There's Memory Pressure:
// 1. There's less than 20% free space left, and we grow the EMPTY
// to_space_
// 2. We set grew_, and the next iteration of Collect() uses a new limit_
// calculated from space_size_ (top of this function)
// 3. That iteration also GROWS ITS EMPTY to_space_ (the other one), and
// resets grew_
//
// Subtle logic for growing the heap. Copied from femtolisp.
//
// Happy Path:
// The collection brought us from "full" to more than 20% heap free.
// Then we can continue to allocate int he new space until it is full, and
// not grow the heap.
//
// When There's Memory Pressure:
// 1. There's less than 20% free space left, and we grow the EMPTY
// to_space_
// 2. We set grew_, and the next iteration of Collect() uses a new limit_
// calculated from space_size_ (top of this function)
// 3. That iteration also GROWS ITS EMPTY to_space_ (the other one), and
// resets grew_
//
#if 0
if (grew_ || must_grow || (limit_ - free_) < (space_size_ / 5)) {
#if GC_DEBUG
log("GROWING HEAP");
Expand All @@ -198,11 +199,12 @@ void Heap::Collect(bool must_grow) {
// invariant of the space we will allocate from next time.
memset(from_space_, 0, space_size_);
}
#endif

// Swap spaces for next collection.
char* tmp = from_space_;
from_space_ = to_space_;
to_space_ = tmp;
char* tmp = from_space_.begin_;
from_space_.begin_ = to_space_.begin_;
to_space_.begin_ = tmp;

#if 0
log("free_ %p scan_ %p limit_ %p", free_, scan_, limit_);
Expand Down
71 changes: 48 additions & 23 deletions mycpp/gc_heap.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,32 +124,45 @@ const int kMaxRoots = 1024; // related to C stack size

// #define GC_DEBUG 1

class Space {
public:
Space() {
}
void Init(int space_size) {
begin_ = static_cast<char*>(malloc(space_size));
size_ = space_size;
// Slab scanning relies on 0 bytes (nullptr). e.g. for a List<Token*>*.
// Note: I noticed that memset() of say 400 MiB is pretty expensive. Does
// it makes sense to zero the slabs instead?
memset(begin_, 0, space_size);
}

void Resize(int multiple) {
}

char* begin_;
int size_; // number of bytes
};

class Heap {
public:
Heap() { // default constructor does nothing -- relies on zero initialization
}

// Real initialization with the initial heap size. The heap grows with
// allocations.
void Init(int num_bytes) {
from_space_ = static_cast<char*>(malloc(num_bytes));
to_space_ = static_cast<char*>(malloc(num_bytes));
limit_ = from_space_ + num_bytes;
void Init(int space_size) {
// Allocate and memset()
from_space_.Init(space_size);
to_space_.Init(space_size);

free_ = from_space_; // where we allocate from
scan_ = nullptr;
free_ = from_space_.begin_; // where we allocate from
limit_ = free_ + space_size;

space_size_ = num_bytes;
grew_ = false;

roots_top_ = 0;

// Slab scanning relies on 0 bytes (nullptr). e.g. for a List<Token*>*.
// Note: I noticed that memset() of say 400 MiB is pretty expensive. Does
// it makes sense to zero the slabs instead?
memset(from_space_, 0, num_bytes);
memset(to_space_, 0, num_bytes);

#if GC_DEBUG
num_collections_ = 0;
num_heap_growths_ = 0;
Expand All @@ -158,6 +171,22 @@ class Heap {
#endif
}

// TODO: Refactor into:
//
// Allocate()
// Collect()
// Is Swap() separate from Collect()?
// Grow(space, int multiple) // explicit arg!
//
// Later optimization:
//
// bool AlmostFull() after Collect()
//
// struct Space {
// char* begin;
// int size; // for growth
// };

void* Allocate(int num_bytes) {
char* p = free_;
int n = aligned(num_bytes);
Expand Down Expand Up @@ -233,24 +262,20 @@ class Heap {
log("num heap growths = %d", num_heap_growths_);
log("num forced heap growths = %d", num_forced_growths_);
log("num live objects = %d", num_live_objs_);
log("heap size = %d", space_size_);

log("from_space_ %p", from_space_);
log("to_space %p", to_space_);
log("from_space_ %p", from_space_.begin_);
log("to_space %p", to_space_.begin_);
log("-----");
}
#endif

char* from_space_; // beginning of the space we're allocating from
char* to_space_; // beginning of the space we should copy to
char* limit_; // end of space we're allocating from
Space from_space_;
Space to_space_;
char* limit_; // end of space we're allocating from

char* scan_; // boundary between black and grey
char* free_; // next place to allocate, from_space_ <= free_ < limit_

int space_size_; // current size of space. NOT redundant with limit_ because
// of resizing
bool grew_; // did the TO SPACE grow on the last collection?
bool grew_; // did the TO SPACE grow on the last collection?

// Stack roots. The obvious data structure is a linked list, but an array
// has better locality.
Expand Down
16 changes: 8 additions & 8 deletions mycpp/gc_heap_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ TEST str_test() {
ASSERT_EQ_FMT(kStrHeaderSize + 7 + 1, str2->obj_len_, "%d");

// Make sure they're on the heap
int diff1 = reinterpret_cast<char*>(str1) - gHeap.from_space_;
int diff2 = reinterpret_cast<char*>(str2) - gHeap.from_space_;
int diff1 = reinterpret_cast<char*>(str1) - gHeap.from_space_.begin_;
int diff2 = reinterpret_cast<char*>(str2) - gHeap.from_space_.begin_;
ASSERT(diff1 < 1024);
ASSERT(diff2 < 1024);

Expand Down Expand Up @@ -140,8 +140,8 @@ TEST list_test() {
ASSERT_EQ_FMT(24, list2->obj_len_, "%d");

// Make sure they're on the heap
int diff1 = reinterpret_cast<char*>(list1) - gHeap.from_space_;
int diff2 = reinterpret_cast<char*>(list2) - gHeap.from_space_;
int diff1 = reinterpret_cast<char*>(list1) - gHeap.from_space_.begin_;
int diff2 = reinterpret_cast<char*>(list2) - gHeap.from_space_.begin_;
ASSERT(diff1 < 1024);
ASSERT(diff2 < 1024);

Expand Down Expand Up @@ -183,7 +183,7 @@ TEST list_test() {
ASSERT_EQ_FMT(88, list1->index(7), "%d");
ASSERT_EQ_FMT(8, len(list1), "%d");

int d_slab = reinterpret_cast<char*>(list1->slab_) - gHeap.from_space_;
int d_slab = reinterpret_cast<char*>(list1->slab_) - gHeap.from_space_.begin_;
ASSERT(d_slab < 1024);

log("list1_ = %p", list1);
Expand Down Expand Up @@ -225,8 +225,8 @@ TEST dict_test() {
ASSERT_EQ(nullptr, dict1->values_);

// Make sure they're on the heap
int diff1 = reinterpret_cast<char*>(dict1) - gHeap.from_space_;
int diff2 = reinterpret_cast<char*>(dict2) - gHeap.from_space_;
int diff1 = reinterpret_cast<char*>(dict1) - gHeap.from_space_.begin_;
int diff2 = reinterpret_cast<char*>(dict2) - gHeap.from_space_.begin_;
ASSERT(diff1 < 1024);
ASSERT(diff2 < 1024);

Expand Down Expand Up @@ -433,7 +433,7 @@ void ShowRoots(const Heap& heap) {
log(" %p", raw);

// Raw pointer is on the heap.
int diff2 = reinterpret_cast<char*>(raw) - gHeap.from_space_;
int diff2 = reinterpret_cast<char*>(raw) - gHeap.from_space_.begin_;
// log("diff2 = %d", diff2);
assert(diff2 < 2048);

Expand Down
3 changes: 3 additions & 0 deletions mycpp/gc_stress_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,14 @@ int main(int argc, char** argv) {
GREATEST_MAIN_BEGIN();

RUN_TEST(str_simple_test);
// TODO: Restore growth
#if 0
RUN_TEST(str_growth_test);
RUN_TEST(list_append_test);
RUN_TEST(list_slice_append_test);
RUN_TEST(list_str_growth_test);
RUN_TEST(dict_growth_test);
#endif

GREATEST_MAIN_END(); /* display results */
return 0;
Expand Down

0 comments on commit b394bae

Please sign in to comment.