perf: optimize lean_dec_ref_cold (#13669)
This PR optimizes `lean_dec_ref_cold` by outlining the "freezing cold" path and performing a small microarchitecural optimization. The latter is better as it makes clear to LLVM that we believe the pointer to only use 48 bits.
This commit is contained in:
parent
5d5642107d
commit
355dca6f57
1 changed files with 56 additions and 53 deletions
|
|
@ -293,11 +293,10 @@ static inline lean_object * get_next(lean_object * o) {
|
||||||
|
|
||||||
static inline void set_next(lean_object * o, lean_object * n) {
|
static inline void set_next(lean_object * o, lean_object * n) {
|
||||||
if (sizeof(void*) == 8) {
|
if (sizeof(void*) == 8) {
|
||||||
size_t new_header = (size_t)n;
|
uint16_t hi;
|
||||||
LEAN_BYTE(new_header, 7) = o->m_tag;
|
memcpy(&hi, (char*)o + 6, 2);
|
||||||
LEAN_BYTE(new_header, 6) = o->m_other;
|
size_t header = ((size_t)hi << 48) | (size_t)n;
|
||||||
((size_t*)o)[0] = new_header;
|
memcpy(o, &header, 8);
|
||||||
lean_assert(get_next(o) == n);
|
|
||||||
} else {
|
} else {
|
||||||
// 32-bit version
|
// 32-bit version
|
||||||
((lean_object**)o)[0] = n;
|
((lean_object**)o)[0] = n;
|
||||||
|
|
@ -361,61 +360,65 @@ extern "C" LEAN_EXPORT lean_object * lean_alloc_object(size_t sz) {
|
||||||
static void deactivate_task(lean_task_object * t);
|
static void deactivate_task(lean_task_object * t);
|
||||||
static void deactivate_promise(lean_promise_object * t);
|
static void deactivate_promise(lean_promise_object * t);
|
||||||
|
|
||||||
|
static void lean_del_core_other(object * o, uint8 tag, object * & todo) {
|
||||||
|
switch (tag) {
|
||||||
|
case LeanClosure: {
|
||||||
|
object ** it = lean_closure_arg_cptr(o);
|
||||||
|
object ** end = it + lean_closure_num_fixed(o);
|
||||||
|
for (; it != end; ++it) dec(*it, todo);
|
||||||
|
lean_dealloc(o, lean_closure_byte_size(o));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case LeanArray: {
|
||||||
|
object ** it = lean_array_cptr(o);
|
||||||
|
object ** end = it + lean_array_size(o);
|
||||||
|
for (; it != end; ++it) dec(*it, todo);
|
||||||
|
lean_dealloc(o, lean_array_byte_size(o));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case LeanScalarArray:
|
||||||
|
lean_dealloc(o, lean_sarray_byte_size(o));
|
||||||
|
break;
|
||||||
|
case LeanString:
|
||||||
|
lean_dealloc(o, lean_string_byte_size(o));
|
||||||
|
break;
|
||||||
|
case LeanMPZ:
|
||||||
|
to_mpz(o)->m_value.~mpz();
|
||||||
|
lean_free_small_object(o);
|
||||||
|
break;
|
||||||
|
case LeanThunk:
|
||||||
|
if (object * c = lean_to_thunk(o)->m_closure) dec(c, todo);
|
||||||
|
if (object * v = lean_to_thunk(o)->m_value) dec(v, todo);
|
||||||
|
lean_free_small_object(o);
|
||||||
|
break;
|
||||||
|
case LeanRef:
|
||||||
|
if (object * v = lean_to_ref(o)->m_value) dec(v, todo);
|
||||||
|
lean_free_small_object(o);
|
||||||
|
break;
|
||||||
|
case LeanTask:
|
||||||
|
deactivate_task(lean_to_task(o));
|
||||||
|
break;
|
||||||
|
case LeanPromise:
|
||||||
|
deactivate_promise(lean_to_promise(o));
|
||||||
|
break;
|
||||||
|
case LeanExternal:
|
||||||
|
lean_to_external(o)->m_class->m_finalize(lean_to_external(o)->m_data);
|
||||||
|
lean_free_small_object(o);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
lean_unreachable();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void lean_del_core(object * o, object * & todo) {
|
static void lean_del_core(object * o, object * & todo) {
|
||||||
uint8 tag = lean_ptr_tag(o);
|
uint8 tag = lean_ptr_tag(o);
|
||||||
if (tag <= LeanMaxCtorTag) {
|
if (LEAN_LIKELY(tag <= LeanMaxCtorTag)) {
|
||||||
object ** it = lean_ctor_obj_cptr(o);
|
object ** it = lean_ctor_obj_cptr(o);
|
||||||
object ** end = it + lean_ctor_num_objs(o);
|
object ** end = it + lean_ctor_num_objs(o);
|
||||||
for (; it != end; ++it) dec(*it, todo);
|
for (; it != end; ++it) dec(*it, todo);
|
||||||
lean_free_small_object(o);
|
lean_free_small_object(o);
|
||||||
} else {
|
} else {
|
||||||
switch (tag) {
|
lean_del_core_other(o, tag, todo);
|
||||||
case LeanClosure: {
|
|
||||||
object ** it = lean_closure_arg_cptr(o);
|
|
||||||
object ** end = it + lean_closure_num_fixed(o);
|
|
||||||
for (; it != end; ++it) dec(*it, todo);
|
|
||||||
lean_dealloc(o, lean_closure_byte_size(o));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case LeanArray: {
|
|
||||||
object ** it = lean_array_cptr(o);
|
|
||||||
object ** end = it + lean_array_size(o);
|
|
||||||
for (; it != end; ++it) dec(*it, todo);
|
|
||||||
lean_dealloc(o, lean_array_byte_size(o));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case LeanScalarArray:
|
|
||||||
lean_dealloc(o, lean_sarray_byte_size(o));
|
|
||||||
break;
|
|
||||||
case LeanString:
|
|
||||||
lean_dealloc(o, lean_string_byte_size(o));
|
|
||||||
break;
|
|
||||||
case LeanMPZ:
|
|
||||||
to_mpz(o)->m_value.~mpz();
|
|
||||||
lean_free_small_object(o);
|
|
||||||
break;
|
|
||||||
case LeanThunk:
|
|
||||||
if (object * c = lean_to_thunk(o)->m_closure) dec(c, todo);
|
|
||||||
if (object * v = lean_to_thunk(o)->m_value) dec(v, todo);
|
|
||||||
lean_free_small_object(o);
|
|
||||||
break;
|
|
||||||
case LeanRef:
|
|
||||||
if (object * v = lean_to_ref(o)->m_value) dec(v, todo);
|
|
||||||
lean_free_small_object(o);
|
|
||||||
break;
|
|
||||||
case LeanTask:
|
|
||||||
deactivate_task(lean_to_task(o));
|
|
||||||
break;
|
|
||||||
case LeanPromise:
|
|
||||||
deactivate_promise(lean_to_promise(o));
|
|
||||||
break;
|
|
||||||
case LeanExternal:
|
|
||||||
lean_to_external(o)->m_class->m_finalize(lean_to_external(o)->m_data);
|
|
||||||
lean_free_small_object(o);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
lean_unreachable();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue