perf: optimize lean_dec_ref_cold (#13669)

This PR optimizes `lean_dec_ref_cold` by outlining the "freezing cold"
path and performing a small microarchitecural optimization. The latter
is better as it makes clear to LLVM that we believe the pointer to only
use 48 bits.
This commit is contained in:
Henrik Böving 2026-05-07 10:18:39 +02:00 committed by GitHub
parent 5d5642107d
commit 355dca6f57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -293,11 +293,10 @@ static inline lean_object * get_next(lean_object * o) {
static inline void set_next(lean_object * o, lean_object * n) { static inline void set_next(lean_object * o, lean_object * n) {
if (sizeof(void*) == 8) { if (sizeof(void*) == 8) {
size_t new_header = (size_t)n; uint16_t hi;
LEAN_BYTE(new_header, 7) = o->m_tag; memcpy(&hi, (char*)o + 6, 2);
LEAN_BYTE(new_header, 6) = o->m_other; size_t header = ((size_t)hi << 48) | (size_t)n;
((size_t*)o)[0] = new_header; memcpy(o, &header, 8);
lean_assert(get_next(o) == n);
} else { } else {
// 32-bit version // 32-bit version
((lean_object**)o)[0] = n; ((lean_object**)o)[0] = n;
@ -361,61 +360,65 @@ extern "C" LEAN_EXPORT lean_object * lean_alloc_object(size_t sz) {
static void deactivate_task(lean_task_object * t); static void deactivate_task(lean_task_object * t);
static void deactivate_promise(lean_promise_object * t); static void deactivate_promise(lean_promise_object * t);
static void lean_del_core_other(object * o, uint8 tag, object * & todo) {
switch (tag) {
case LeanClosure: {
object ** it = lean_closure_arg_cptr(o);
object ** end = it + lean_closure_num_fixed(o);
for (; it != end; ++it) dec(*it, todo);
lean_dealloc(o, lean_closure_byte_size(o));
break;
}
case LeanArray: {
object ** it = lean_array_cptr(o);
object ** end = it + lean_array_size(o);
for (; it != end; ++it) dec(*it, todo);
lean_dealloc(o, lean_array_byte_size(o));
break;
}
case LeanScalarArray:
lean_dealloc(o, lean_sarray_byte_size(o));
break;
case LeanString:
lean_dealloc(o, lean_string_byte_size(o));
break;
case LeanMPZ:
to_mpz(o)->m_value.~mpz();
lean_free_small_object(o);
break;
case LeanThunk:
if (object * c = lean_to_thunk(o)->m_closure) dec(c, todo);
if (object * v = lean_to_thunk(o)->m_value) dec(v, todo);
lean_free_small_object(o);
break;
case LeanRef:
if (object * v = lean_to_ref(o)->m_value) dec(v, todo);
lean_free_small_object(o);
break;
case LeanTask:
deactivate_task(lean_to_task(o));
break;
case LeanPromise:
deactivate_promise(lean_to_promise(o));
break;
case LeanExternal:
lean_to_external(o)->m_class->m_finalize(lean_to_external(o)->m_data);
lean_free_small_object(o);
break;
default:
lean_unreachable();
}
}
static void lean_del_core(object * o, object * & todo) { static void lean_del_core(object * o, object * & todo) {
uint8 tag = lean_ptr_tag(o); uint8 tag = lean_ptr_tag(o);
if (tag <= LeanMaxCtorTag) { if (LEAN_LIKELY(tag <= LeanMaxCtorTag)) {
object ** it = lean_ctor_obj_cptr(o); object ** it = lean_ctor_obj_cptr(o);
object ** end = it + lean_ctor_num_objs(o); object ** end = it + lean_ctor_num_objs(o);
for (; it != end; ++it) dec(*it, todo); for (; it != end; ++it) dec(*it, todo);
lean_free_small_object(o); lean_free_small_object(o);
} else { } else {
switch (tag) { lean_del_core_other(o, tag, todo);
case LeanClosure: {
object ** it = lean_closure_arg_cptr(o);
object ** end = it + lean_closure_num_fixed(o);
for (; it != end; ++it) dec(*it, todo);
lean_dealloc(o, lean_closure_byte_size(o));
break;
}
case LeanArray: {
object ** it = lean_array_cptr(o);
object ** end = it + lean_array_size(o);
for (; it != end; ++it) dec(*it, todo);
lean_dealloc(o, lean_array_byte_size(o));
break;
}
case LeanScalarArray:
lean_dealloc(o, lean_sarray_byte_size(o));
break;
case LeanString:
lean_dealloc(o, lean_string_byte_size(o));
break;
case LeanMPZ:
to_mpz(o)->m_value.~mpz();
lean_free_small_object(o);
break;
case LeanThunk:
if (object * c = lean_to_thunk(o)->m_closure) dec(c, todo);
if (object * v = lean_to_thunk(o)->m_value) dec(v, todo);
lean_free_small_object(o);
break;
case LeanRef:
if (object * v = lean_to_ref(o)->m_value) dec(v, todo);
lean_free_small_object(o);
break;
case LeanTask:
deactivate_task(lean_to_task(o));
break;
case LeanPromise:
deactivate_promise(lean_to_promise(o));
break;
case LeanExternal:
lean_to_external(o)->m_class->m_finalize(lean_to_external(o)->m_data);
lean_free_small_object(o);
break;
default:
lean_unreachable();
}
} }
} }