From c71a0ea9a51a90bfad5f1c53192e3736742617f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20B=C3=B6ving?= Date: Fri, 27 Mar 2026 11:13:04 +0100 Subject: [PATCH] perf: coalescing of RC ops within one basic block (#13136) This PR introduces coalescing of RC operations to the RC optimizer. Whenever we perform multiple `inc`s for a single value within one basic block it is legal to instead perform all of these `inc`s at once at the first `inc` side. This is the case because the value will stay alive until at least the last `inc` and was thus never observable with `RC=1`. Hence, this change of `inc` location never destroys reuse opportunities. --- src/Lean/Compiler/LCNF/CoalesceRC.lean | 104 +++++++++++++++++++++++++ src/Lean/Compiler/LCNF/Passes.lean | 2 + tests/elab/compile_coalesce_rc.lean | 41 ++++++++++ 3 files changed, 147 insertions(+) create mode 100644 src/Lean/Compiler/LCNF/CoalesceRC.lean create mode 100644 tests/elab/compile_coalesce_rc.lean diff --git a/src/Lean/Compiler/LCNF/CoalesceRC.lean b/src/Lean/Compiler/LCNF/CoalesceRC.lean new file mode 100644 index 0000000000..e16da0baca --- /dev/null +++ b/src/Lean/Compiler/LCNF/CoalesceRC.lean @@ -0,0 +1,104 @@ +/- +Copyright (c) 2026 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Henrik Böving +-/ +module + +prelude +public import Lean.Compiler.LCNF.CompilerM +public import Lean.Compiler.LCNF.PassManager + +namespace Lean.Compiler.LCNF + +/-! +# Coalesce Reference Counting Operations + +This pass coalesces multiple `inc`/`dec` operations on the same variable within a basic block. +Within a basic block, it is always safe to: +- Move all increments on a variable to the first `inc` location (summing the counts). Because if + there are later `inc`s no intermediate operation can observe RC=1 (as the value must stay alive + until the later inc) and thus doing all relevant `inc` in the beginning doesn't change + semantics. +- Move all decrements on a variable to the last `dec` location (summing the counts). Because the + value is guaranteed to stay alive until at least the last `dec` anyway so a similiar argument to + `inc` holds. + +Crucially this pass must be placed after `expandResetReuse` as that one relies on `inc`s still being +present in their original location for optimization purposes. +-/ + +private structure State where + /-- Total inc count per variable in the current basic block (accumulated going forward). -/ + incTotal : Std.HashMap FVarId Nat := {} + /-- Total dec count per variable in the current basic block (accumulated going forward). -/ + decTotal : Std.HashMap FVarId Nat := {} + /-- + Inc count seen so far per variable going backward. When this equals `incTotal`, we've + reached the first inc and should emit the coalesced operation. + -/ + incAccum : Std.HashMap FVarId Nat := {} + /-- + Whether we've already emitted the coalesced dec for a variable (going backward, the first + dec encountered is the last in the block). + -/ + decPlaced : Std.HashSet FVarId := {} + +private abbrev M := StateRefT State CompilerM + +/-- +Coalesce inc/dec operations within individual basic blocks. +-/ +partial def Code.coalesceRC (code : Code .impure) : CompilerM (Code .impure) := do + go code |>.run' {} +where + go (code : Code .impure) : M (Code .impure) := do + match code with + | .inc fvarId n check persistent k _ => + modify fun s => { s with incTotal := s.incTotal.alter fvarId (fun v? => some ((v?.getD 0) + n)) } + let k ← go k + modify fun s => { s with incAccum := s.incAccum.alter fvarId (fun v? => some ((v?.getD 0) + n)) } + let s ← get + if s.incAccum[fvarId]! == s.incTotal[fvarId]! then + return .inc fvarId s.incTotal[fvarId]! check persistent k + else + return k + | .dec fvarId n check persistent k _ => + modify fun s => { s with decTotal := s.decTotal.alter fvarId (fun v? => some ((v?.getD 0) + n)) } + let k ← go k + let s ← get + if !s.decPlaced.contains fvarId then + modify fun s => { s with decPlaced := s.decPlaced.insert fvarId } + return .dec fvarId s.decTotal[fvarId]! check persistent k + else + return k + | .let _ k => + let k ← go k + return code.updateCont! k + | .jp decl k => + let value ← decl.value.coalesceRC + let decl ← decl.updateValue value + let k ← go k + return code.updateFun! decl k + | .cases c => + let alts ← c.alts.mapMonoM (·.mapCodeM (·.coalesceRC)) + return code.updateAlts! alts + | .del _ k _ => + let k ← go k + return code.updateCont! k + | .oset (k := k) .. | .uset (k := k) .. | .sset (k := k) .. | .setTag (k := k) .. => + let k ← go k + return code.updateCont! k + | .return .. | .jmp .. | .unreach .. => return code + +def Decl.coalesceRC (decl : Decl .impure) : CompilerM (Decl .impure) := do + let value ← decl.value.mapCodeM Code.coalesceRC + return { decl with value } + +public def coalesceRC : Pass := + .mkPerDeclaration `coalesceRc .impure Decl.coalesceRC + +builtin_initialize + registerTraceClass `Compiler.coalesceRc (inherited := true) + +end Lean.Compiler.LCNF diff --git a/src/Lean/Compiler/LCNF/Passes.lean b/src/Lean/Compiler/LCNF/Passes.lean index e4f65b7643..1c96fe21fa 100644 --- a/src/Lean/Compiler/LCNF/Passes.lean +++ b/src/Lean/Compiler/LCNF/Passes.lean @@ -26,6 +26,7 @@ public import Lean.Compiler.LCNF.SimpCase public import Lean.Compiler.LCNF.InferBorrow public import Lean.Compiler.LCNF.ExplicitBoxing public import Lean.Compiler.LCNF.ExplicitRC +public import Lean.Compiler.LCNF.CoalesceRC public import Lean.Compiler.LCNF.Toposort public import Lean.Compiler.LCNF.ExpandResetReuse public import Lean.Compiler.LCNF.SimpleGroundExpr @@ -149,6 +150,7 @@ def builtinPassManager : PassManager := { explicitBoxing, explicitRc, expandResetReuse, + coalesceRC, pushProj (occurrence := 1), detectSimpleGround, inferVisibility (phase := .impure), diff --git a/tests/elab/compile_coalesce_rc.lean b/tests/elab/compile_coalesce_rc.lean new file mode 100644 index 0000000000..c9aed65425 --- /dev/null +++ b/tests/elab/compile_coalesce_rc.lean @@ -0,0 +1,41 @@ +module + +/-! This test demonstrates basic coalescing capabilities of the RC optimizer -/ + +public section + +@[extern "foo"] +opaque foo (x : String) (n : Nat) : Nat + + +/-- +trace: [Compiler.coalesceRc] size: 18 + def test x : tobj := + let _x.1 := 0; + inc[3][ref] x; + let a := foo x _x.1; + let _x.2 := 1; + let _x.3 := foo x _x.2; + let a := Nat.add a _x.3; + dec _x.3; + dec a; + let _x.4 := 2; + let _x.5 := foo x _x.4; + let a := Nat.add a _x.5; + dec _x.5; + dec a; + let _x.6 := 3; + let _x.7 := foo x _x.6; + let a := Nat.add a _x.7; + dec _x.7; + dec a; + return a +-/ +#guard_msgs in +set_option trace.Compiler.coalesceRc true in +def test (x : String) : Nat := + let a := foo x 0 + let a := a + foo x 1 + let a := a + foo x 2 + let a := a + foo x 3 + a