perf: coalescing of RC ops within one basic block (#13136)
This PR introduces coalescing of RC operations to the RC optimizer. Whenever we perform multiple `inc`s for a single value within one basic block it is legal to instead perform all of these `inc`s at once at the first `inc` side. This is the case because the value will stay alive until at least the last `inc` and was thus never observable with `RC=1`. Hence, this change of `inc` location never destroys reuse opportunities.
This commit is contained in:
parent
439c3c5544
commit
c71a0ea9a5
3 changed files with 147 additions and 0 deletions
104
src/Lean/Compiler/LCNF/CoalesceRC.lean
Normal file
104
src/Lean/Compiler/LCNF/CoalesceRC.lean
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
/-
|
||||
Copyright (c) 2026 Lean FRO, LLC. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Authors: Henrik Böving
|
||||
-/
|
||||
module
|
||||
|
||||
prelude
|
||||
public import Lean.Compiler.LCNF.CompilerM
|
||||
public import Lean.Compiler.LCNF.PassManager
|
||||
|
||||
namespace Lean.Compiler.LCNF
|
||||
|
||||
/-!
|
||||
# Coalesce Reference Counting Operations
|
||||
|
||||
This pass coalesces multiple `inc`/`dec` operations on the same variable within a basic block.
|
||||
Within a basic block, it is always safe to:
|
||||
- Move all increments on a variable to the first `inc` location (summing the counts). Because if
|
||||
there are later `inc`s no intermediate operation can observe RC=1 (as the value must stay alive
|
||||
until the later inc) and thus doing all relevant `inc` in the beginning doesn't change
|
||||
semantics.
|
||||
- Move all decrements on a variable to the last `dec` location (summing the counts). Because the
|
||||
value is guaranteed to stay alive until at least the last `dec` anyway so a similiar argument to
|
||||
`inc` holds.
|
||||
|
||||
Crucially this pass must be placed after `expandResetReuse` as that one relies on `inc`s still being
|
||||
present in their original location for optimization purposes.
|
||||
-/
|
||||
|
||||
private structure State where
|
||||
/-- Total inc count per variable in the current basic block (accumulated going forward). -/
|
||||
incTotal : Std.HashMap FVarId Nat := {}
|
||||
/-- Total dec count per variable in the current basic block (accumulated going forward). -/
|
||||
decTotal : Std.HashMap FVarId Nat := {}
|
||||
/--
|
||||
Inc count seen so far per variable going backward. When this equals `incTotal`, we've
|
||||
reached the first inc and should emit the coalesced operation.
|
||||
-/
|
||||
incAccum : Std.HashMap FVarId Nat := {}
|
||||
/--
|
||||
Whether we've already emitted the coalesced dec for a variable (going backward, the first
|
||||
dec encountered is the last in the block).
|
||||
-/
|
||||
decPlaced : Std.HashSet FVarId := {}
|
||||
|
||||
private abbrev M := StateRefT State CompilerM
|
||||
|
||||
/--
|
||||
Coalesce inc/dec operations within individual basic blocks.
|
||||
-/
|
||||
partial def Code.coalesceRC (code : Code .impure) : CompilerM (Code .impure) := do
|
||||
go code |>.run' {}
|
||||
where
|
||||
go (code : Code .impure) : M (Code .impure) := do
|
||||
match code with
|
||||
| .inc fvarId n check persistent k _ =>
|
||||
modify fun s => { s with incTotal := s.incTotal.alter fvarId (fun v? => some ((v?.getD 0) + n)) }
|
||||
let k ← go k
|
||||
modify fun s => { s with incAccum := s.incAccum.alter fvarId (fun v? => some ((v?.getD 0) + n)) }
|
||||
let s ← get
|
||||
if s.incAccum[fvarId]! == s.incTotal[fvarId]! then
|
||||
return .inc fvarId s.incTotal[fvarId]! check persistent k
|
||||
else
|
||||
return k
|
||||
| .dec fvarId n check persistent k _ =>
|
||||
modify fun s => { s with decTotal := s.decTotal.alter fvarId (fun v? => some ((v?.getD 0) + n)) }
|
||||
let k ← go k
|
||||
let s ← get
|
||||
if !s.decPlaced.contains fvarId then
|
||||
modify fun s => { s with decPlaced := s.decPlaced.insert fvarId }
|
||||
return .dec fvarId s.decTotal[fvarId]! check persistent k
|
||||
else
|
||||
return k
|
||||
| .let _ k =>
|
||||
let k ← go k
|
||||
return code.updateCont! k
|
||||
| .jp decl k =>
|
||||
let value ← decl.value.coalesceRC
|
||||
let decl ← decl.updateValue value
|
||||
let k ← go k
|
||||
return code.updateFun! decl k
|
||||
| .cases c =>
|
||||
let alts ← c.alts.mapMonoM (·.mapCodeM (·.coalesceRC))
|
||||
return code.updateAlts! alts
|
||||
| .del _ k _ =>
|
||||
let k ← go k
|
||||
return code.updateCont! k
|
||||
| .oset (k := k) .. | .uset (k := k) .. | .sset (k := k) .. | .setTag (k := k) .. =>
|
||||
let k ← go k
|
||||
return code.updateCont! k
|
||||
| .return .. | .jmp .. | .unreach .. => return code
|
||||
|
||||
def Decl.coalesceRC (decl : Decl .impure) : CompilerM (Decl .impure) := do
|
||||
let value ← decl.value.mapCodeM Code.coalesceRC
|
||||
return { decl with value }
|
||||
|
||||
public def coalesceRC : Pass :=
|
||||
.mkPerDeclaration `coalesceRc .impure Decl.coalesceRC
|
||||
|
||||
builtin_initialize
|
||||
registerTraceClass `Compiler.coalesceRc (inherited := true)
|
||||
|
||||
end Lean.Compiler.LCNF
|
||||
|
|
@ -26,6 +26,7 @@ public import Lean.Compiler.LCNF.SimpCase
|
|||
public import Lean.Compiler.LCNF.InferBorrow
|
||||
public import Lean.Compiler.LCNF.ExplicitBoxing
|
||||
public import Lean.Compiler.LCNF.ExplicitRC
|
||||
public import Lean.Compiler.LCNF.CoalesceRC
|
||||
public import Lean.Compiler.LCNF.Toposort
|
||||
public import Lean.Compiler.LCNF.ExpandResetReuse
|
||||
public import Lean.Compiler.LCNF.SimpleGroundExpr
|
||||
|
|
@ -149,6 +150,7 @@ def builtinPassManager : PassManager := {
|
|||
explicitBoxing,
|
||||
explicitRc,
|
||||
expandResetReuse,
|
||||
coalesceRC,
|
||||
pushProj (occurrence := 1),
|
||||
detectSimpleGround,
|
||||
inferVisibility (phase := .impure),
|
||||
|
|
|
|||
41
tests/elab/compile_coalesce_rc.lean
Normal file
41
tests/elab/compile_coalesce_rc.lean
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
module
|
||||
|
||||
/-! This test demonstrates basic coalescing capabilities of the RC optimizer -/
|
||||
|
||||
public section
|
||||
|
||||
@[extern "foo"]
|
||||
opaque foo (x : String) (n : Nat) : Nat
|
||||
|
||||
|
||||
/--
|
||||
trace: [Compiler.coalesceRc] size: 18
|
||||
def test x : tobj :=
|
||||
let _x.1 := 0;
|
||||
inc[3][ref] x;
|
||||
let a := foo x _x.1;
|
||||
let _x.2 := 1;
|
||||
let _x.3 := foo x _x.2;
|
||||
let a := Nat.add a _x.3;
|
||||
dec _x.3;
|
||||
dec a;
|
||||
let _x.4 := 2;
|
||||
let _x.5 := foo x _x.4;
|
||||
let a := Nat.add a _x.5;
|
||||
dec _x.5;
|
||||
dec a;
|
||||
let _x.6 := 3;
|
||||
let _x.7 := foo x _x.6;
|
||||
let a := Nat.add a _x.7;
|
||||
dec _x.7;
|
||||
dec a;
|
||||
return a
|
||||
-/
|
||||
#guard_msgs in
|
||||
set_option trace.Compiler.coalesceRc true in
|
||||
def test (x : String) : Nat :=
|
||||
let a := foo x 0
|
||||
let a := a + foo x 1
|
||||
let a := a + foo x 2
|
||||
let a := a + foo x 3
|
||||
a
|
||||
Loading…
Add table
Reference in a new issue