Skip to content

Commit ff754c1

Browse files
committed
add benchmark for arena2 vs boa gc
1 parent 0442fb6 commit ff754c1

File tree

23 files changed

+3647
-9
lines changed

23 files changed

+3647
-9
lines changed

notes/arena2_vs_boa_gc.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# arena2 vs boa_gc benchmark results
2+
3+
Note author: shruti2522
4+
date: 2026-03-06
5+
6+
This benchmark measures how the `arena2` allocator which uses a simple bump allocator with `TaggedPtr` headers for liveness compares against the standard `boa_gc` implementation
7+
8+
Ran the `arena2_vs_boa_gc` bench suite. It compares oscars' `arena2` against `boa_gc` across node allocation, collection pauses, mixed workloads, and memory pressure.
9+
10+
## Results
11+
12+
### gc_node_allocation
13+
14+
arena2 heavily outperforms boa_gc across all sizes.
15+
- **10 nodes:** arena2 takes ~320 ns vs ~750 ns for boa_gc
16+
- **100 nodes:** arena2 takes ~3.2 µs vs ~6.4 µs for boa_gc
17+
- **1000 nodes:** arena2 takes ~27.3 µs vs ~56.2 µs for boa_gc
18+
19+
This shows that bump allocation into an arena page is consistently more than 2x faster than whatever the standard boa_gc is doing.
20+
21+
### gc_collection_pause
22+
23+
Similar to allocations, the sweep phase in arena2 is extremely fast compared to boa_gc.
24+
- **100 objects:** arena2 sweeps in ~3.5 µs vs ~7.3 µs for boa_gc
25+
- **500 objects:** arena2 sweeps in ~15.2 µs vs ~32.5 µs for boa_gc
26+
- **1000 objects:** arena2 sweeps in ~29.5 µs vs ~74.9 µs for boa_gc
27+
28+
The linear scan over the contiguous blocks in arena2 during garbage collection cuts the pause times by more than half.
29+
30+
### mixed_workload
31+
32+
This tests repeated allocations spread around `collect()` pauses.
33+
Both allocators performed similarly here. arena2 took ~17.8 µs and boa_gc took ~17.8 µs. So arena2's big speed advantage seems to even out when allocations and collections are mixed together.
34+
35+
### memory_pressure
36+
37+
This tests creating and deleting many objects quickly (make 50, keep 5, collect, repeat 10 times).
38+
both allocators are equally fast here. arena2 took ~46.0 µs and boa_gc took ~46.6 µs. The cost of throwing away whole memory pages versus single objects seems to balance out
39+
40+
## Conclusion
41+
42+
`arena2` is much faster for simple allocations and collection sweeps, about twice as fast. In mixed tests and heavy memory tests, they perform about the same.

oscars/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ required-features = ["gc_allocator"]
2323
name = "arena2_vs_arena3"
2424
harness = false
2525

26+
[[bench]]
27+
name = "arena2_vs_boa_gc"
28+
harness = false
29+
2630
[features]
2731
default = ["mark_sweep"]
2832
std = []

oscars/benches/arena2_vs_boa_gc.rs

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
2+
use oscars::collectors::mark_sweep_arena2::{
3+
Finalize, Gc as OscarsGc, MarkSweepGarbageCollector, Trace, TraceColor,
4+
cell::GcRefCell as OscarsGcRefCell,
5+
};
6+
7+
use boa_gc::{Gc as BoaGc, GcRefCell as BoaGcRefCell, force_collect as boa_force_collect};
8+
9+
fn bench_alloc(c: &mut Criterion) {
10+
let mut group = c.benchmark_group("gc_node_allocation");
11+
12+
for size in [10, 100, 1000].iter() {
13+
group.bench_with_input(BenchmarkId::new("arena2", size), size, |b, &size| {
14+
let collector = MarkSweepGarbageCollector::default()
15+
.with_arena_size(65536)
16+
.with_heap_threshold(262144);
17+
18+
b.iter(|| {
19+
let mut roots = Vec::new();
20+
for i in 0..size {
21+
let root = OscarsGc::new_in(OscarsGcRefCell::new(i), &collector);
22+
roots.push(root);
23+
}
24+
black_box(roots.len())
25+
});
26+
});
27+
28+
group.bench_with_input(BenchmarkId::new("boa_gc", size), size, |b, &size| {
29+
b.iter_batched(
30+
|| {
31+
boa_force_collect();
32+
},
33+
|()| {
34+
let mut gcs = Vec::new();
35+
for i in 0..size {
36+
let gc = BoaGc::new(BoaGcRefCell::new(i));
37+
gcs.push(gc);
38+
}
39+
black_box(gcs.len())
40+
},
41+
criterion::BatchSize::SmallInput,
42+
);
43+
});
44+
}
45+
46+
group.finish();
47+
}
48+
49+
fn bench_collection(c: &mut Criterion) {
50+
let mut group = c.benchmark_group("gc_collection_pause");
51+
52+
for size in [100, 500, 1000].iter() {
53+
group.bench_with_input(BenchmarkId::new("arena2", size), size, |b, &size| {
54+
let collector = MarkSweepGarbageCollector::default()
55+
.with_arena_size(65536)
56+
.with_heap_threshold(262144);
57+
58+
b.iter(|| {
59+
let mut roots = Vec::new();
60+
for i in 0..size {
61+
let root = OscarsGc::new_in(OscarsGcRefCell::new(i), &collector);
62+
roots.push(root);
63+
}
64+
// let half be garbage
65+
roots.truncate(size / 2);
66+
collector.collect();
67+
black_box(roots.len())
68+
});
69+
});
70+
71+
group.bench_with_input(BenchmarkId::new("boa_gc", size), size, |b, &size| {
72+
b.iter(|| {
73+
let mut gcs = Vec::new();
74+
for i in 0..size {
75+
let gc = BoaGc::new(BoaGcRefCell::new(i));
76+
gcs.push(gc);
77+
}
78+
gcs.truncate(size / 2);
79+
boa_force_collect();
80+
black_box(gcs.len())
81+
});
82+
});
83+
}
84+
85+
group.finish();
86+
}
87+
88+
fn bench_mixed(c: &mut Criterion) {
89+
let mut group = c.benchmark_group("mixed_workload");
90+
91+
group.bench_function("arena2", |b| {
92+
let collector = MarkSweepGarbageCollector::default()
93+
.with_arena_size(65536)
94+
.with_heap_threshold(131072);
95+
96+
b.iter(|| {
97+
let mut roots = Vec::new();
98+
99+
for i in 0..100 {
100+
let root = OscarsGc::new_in(OscarsGcRefCell::new(i), &collector);
101+
roots.push(root);
102+
}
103+
collector.collect();
104+
105+
for i in 100..200 {
106+
let root = OscarsGc::new_in(OscarsGcRefCell::new(i), &collector);
107+
roots.push(root);
108+
}
109+
collector.collect();
110+
111+
black_box(roots.len())
112+
});
113+
});
114+
115+
group.bench_function("boa_gc", |b| {
116+
b.iter(|| {
117+
let mut gcs = Vec::new();
118+
119+
for i in 0..100 {
120+
let gc = BoaGc::new(BoaGcRefCell::new(i));
121+
gcs.push(gc);
122+
}
123+
boa_force_collect();
124+
125+
for i in 100..200 {
126+
let gc = BoaGc::new(BoaGcRefCell::new(i));
127+
gcs.push(gc);
128+
}
129+
boa_force_collect();
130+
131+
black_box(gcs.len())
132+
});
133+
});
134+
135+
group.finish();
136+
}
137+
138+
fn bench_pressure(c: &mut Criterion) {
139+
let mut group = c.benchmark_group("memory_pressure");
140+
141+
group.bench_function("arena2", |b| {
142+
let collector = MarkSweepGarbageCollector::default()
143+
.with_arena_size(32768)
144+
.with_heap_threshold(65536);
145+
146+
b.iter(|| {
147+
let mut live = Vec::new();
148+
149+
for round in 0..10 {
150+
for i in 0..50 {
151+
let obj = OscarsGc::new_in(OscarsGcRefCell::new(round * 100 + i), &collector);
152+
if i % 10 == 0 {
153+
live.push(obj);
154+
}
155+
}
156+
collector.collect();
157+
}
158+
159+
black_box(live.len())
160+
});
161+
});
162+
163+
group.bench_function("boa_gc", |b| {
164+
b.iter(|| {
165+
let mut live = Vec::new();
166+
167+
for round in 0..10 {
168+
for i in 0..50 {
169+
let obj = BoaGc::new(BoaGcRefCell::new(round * 100 + i));
170+
if i % 10 == 0 {
171+
live.push(obj);
172+
}
173+
}
174+
boa_force_collect();
175+
}
176+
177+
black_box(live.len())
178+
});
179+
});
180+
181+
group.finish();
182+
}
183+
184+
criterion_group!(
185+
benches,
186+
bench_alloc,
187+
bench_collection,
188+
bench_mixed,
189+
bench_pressure,
190+
);
191+
192+
criterion_main!(benches);

oscars/src/alloc/arena2/alloc.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ impl<T: ?Sized> ArenaHeapItem<T> {
4444
&mut self.value as *mut T
4545
}
4646

47-
fn value_mut(&mut self) -> &mut T {
47+
pub(crate) fn value_mut(&mut self) -> &mut T {
4848
&mut self.value
4949
}
5050
}

oscars/src/alloc/arena2/tests.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ fn arc_drop() {
7777
let heap_item_mut = heap_item.as_mut();
7878
// Manually drop the heap item
7979
heap_item_mut.mark_dropped();
80-
drop_in_place(heap_item_mut.as_ptr());
80+
drop_in_place(heap_item_mut.value_mut());
8181
};
8282

8383
assert!(dropped.load(Ordering::SeqCst));

oscars/src/collectors/mark_sweep/internals/gc_box.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@ impl<T: Trace + Finalize + ?Sized> WeakGcBox<T> {
4444
}
4545

4646
pub(crate) fn erased_inner_ptr(&self) -> NonNull<GcBox<NonTraceable>> {
47-
// SAFETY: `as_heap_ptr` returns a valid pointer to
48-
// `ArenaHeapItem` whose lifetime is tied to the arena
49-
let heap_item = unsafe { self.as_heap_ptr().as_mut() };
50-
// SAFETY: We just removed this value from a NonNull
51-
unsafe { NonNull::new_unchecked(heap_item.as_ptr()) }
47+
use crate::alloc::arena3::ArenaHeapItem;
48+
// SAFETY: `ArenaHeapItem` is `repr(transparent)`, use addr_of_mut! to avoid
49+
// creating a &mut reference during trace
50+
let raw: *mut ArenaHeapItem<GcBox<NonTraceable>> = self.as_heap_ptr().as_ptr();
51+
unsafe { NonNull::new_unchecked(core::ptr::addr_of_mut!((*raw).0)) }
5252
}
5353

5454
pub(crate) fn as_heap_ptr(&self) -> NonNull<ArenaHeapItem<GcBox<NonTraceable>>> {

oscars/src/collectors/mark_sweep/pointers/gc.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,12 @@ impl<T: Trace> Gc<T> {
4444

4545
impl<T: Trace + ?Sized> Gc<T> {
4646
pub(crate) fn as_sized_inner_ptr(&self) -> NonNull<GcBox<NonTraceable>> {
47-
let heap_item = unsafe { self.as_heap_ptr().as_mut() };
48-
unsafe { NonNull::new_unchecked(heap_item.as_ptr()) }
47+
// SAFETY: use `addr_of_mut!` to get a raw pointer without creating
48+
// a `&mut` reference, avoiding Stacked Borrows UB during GC tracing
49+
let raw: *mut ArenaHeapItem<GcBox<NonTraceable>> = self.as_heap_ptr().as_ptr();
50+
// SAFETY: `raw` is non-null because it comes from `as_heap_ptr()`
51+
// `ArenaHeapItem` is `#[repr(transparent)]` so it shares the same address as field 0
52+
unsafe { NonNull::new_unchecked(core::ptr::addr_of_mut!((*raw).0)) }
4953
}
5054

5155
pub(crate) fn as_heap_ptr(&self) -> NonNull<ArenaHeapItem<GcBox<NonTraceable>>> {

oscars/src/collectors/mark_sweep/tests.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,10 @@ mod gc_edge_cases {
445445
next: Option<Gc<Node>>,
446446
}
447447

448+
#[cfg(miri)]
449+
const DEPTH: usize = 20;
450+
451+
#[cfg(not(miri))]
448452
const DEPTH: usize = 1_000;
449453

450454
let mut head = Gc::new_in(Node { _id: 0, next: None }, collector);
@@ -613,6 +617,10 @@ mod gc_edge_cases {
613617
next: Option<Gc<Chain>>,
614618
}
615619

620+
#[cfg(miri)]
621+
const LEN: usize = 20;
622+
623+
#[cfg(not(miri))]
616624
const LEN: usize = 500;
617625

618626
let mut head = Gc::new_in(Chain { next: None }, collector);
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Mark sweep collector
2+
3+
This is a basic mark-sweep collector using an underlying arena allocator.
4+
5+
## TODO list
6+
7+
- [x] Support weak maps
8+
- [x] Add Tests
9+
10+
11+
## Areas of improvement
12+
13+
The overhead on a single allocation honestly feels a bit high. This may be worthwhile
14+
for now for performance gains and general API, but we should really measure and determine
15+
just how much overhead is being added.
16+
17+
Currently, there is a line drawn between the allocator and the GcBox. This creates very,
18+
very awkward naming (ArenaPointer, ArenaHeapItem, GcBox, etc.). We may be able to combine
19+
the general functionality of the ArenaHeapItem, and GcBox. But also, that would then
20+
restrict the potential ability to switch out allocators as easily ... to be determined.
21+

0 commit comments

Comments
 (0)