fix: make name mangling unambiguous (#10727)

This PR fixes name mangling to be unambiguous / injective by adding `00`
for disambiguation where necessary. Additionally, the inverse function,
`Lean.Name.unmangle` has been added which can be used to unmangle a
mangled identifier. This unmangler has been added to demonstrate the
injectivity but also to allow unmangling identifiers e.g. for debugging
purposes.

Closes #10724
This commit is contained in:
Rob23oba 2025-10-23 09:18:07 +02:00 committed by GitHub
parent cf22c367a1
commit fad0e69cc7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 336 additions and 89 deletions

View file

@ -2077,9 +2077,14 @@ theorem Slice.Pos.prevAux_lt_self {s : Slice} {p : s.Pos} {h} : p.prevAux h < p.
theorem Slice.Pos.prevAux_lt_rawEndPos {s : Slice} {p : s.Pos} {h} : p.prevAux h < s.rawEndPos :=
Pos.Raw.lt_of_lt_of_le prevAux_lt_self p.isValidForSlice.le_utf8ByteSize
@[simp]
theorem Slice.Pos.prev_ne_endPos {s : Slice} {p : s.Pos} {h} : p.prev h ≠ s.endPos := by
simpa [Pos.ext_iff, prev] using Pos.Raw.ne_of_lt prevAux_lt_rawEndPos
@[simp]
theorem ValidPos.prev_ne_endValidPos {s : String} {p : s.ValidPos} {h} : p.prev h ≠ s.endValidPos :=
mt (congrArg (·.toSlice)) (Slice.Pos.prev_ne_endPos (h := mt (congrArg (·.ofSlice)) h))
theorem Slice.Pos.offset_prev_lt_offset {s : Slice} {p : s.Pos} {h} : (p.prev h).offset < p.offset := by
simpa [prev] using prevAux_lt_self

View file

@ -1,72 +1,256 @@
/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
Author: Leonardo de Moura, Robin Arnez
-/
module
prelude
public import Lean.Data.Name
import Init.Data.String.Iterator
public section
public import Init.Prelude
import Init.Data.String.Basic
namespace String
private def mangleAux : Nat → String.Iterator → String → String
| 0, _, r => r
| i+1, it, r =>
let c := it.curr
if c.isAlpha || c.isDigit then
mangleAux i it.next (r.push c)
else if c = '_' then
mangleAux i it.next (r ++ "__")
else if c.toNat < 0x100 then
let n := c.toNat
let r := r ++ "_x"
let r := r.push <| Nat.digitChar (n / 0x10)
let r := r.push <| Nat.digitChar (n % 0x10)
mangleAux i it.next r
else if c.toNat < 0x10000 then
let n := c.toNat
let r := r ++ "_u"
let r := r.push <| Nat.digitChar (n / 0x1000)
let n := n % 0x1000
let r := r.push <| Nat.digitChar (n / 0x100)
let n := n % 0x100
let r := r.push <| Nat.digitChar (n / 0x10)
let r := r.push <| Nat.digitChar (n % 0x10)
mangleAux i it.next r
else
let n := c.toNat
let r := r ++ "_U"
let ds := Nat.toDigits 16 n
let r := Nat.repeat (·.push '0') (8 - ds.length) r
let r := ds.foldl (fun r c => r.push c) r
mangleAux i it.next r
def digitChar (n : UInt32) (h : n < 16) : Char :=
if h' : n < 10 then ⟨n + 48, ?_⟩
else ⟨n + 87, ?_⟩
where finally all_goals
simp_all [UInt32.lt_iff_toNat_lt, UInt32.isValidChar, Nat.isValidChar]; omega
def mangle (s : String) : String :=
mangleAux s.length s.mkIterator ""
def pushHex (n : Nat) (val : UInt32) (s : String) : String :=
match n with
| 0 => s
| k + 1 =>
let i := (val >>> (4 * k.toUInt32)) &&& 15
pushHex k val (s.push (digitChar i ?_))
where finally
have := Nat.and_two_pow_sub_one_eq_mod (n := 4)
simp only [Nat.reducePow, Nat.add_one_sub_one] at this
simp [i, UInt32.lt_iff_toNat_lt, this]; omega
def ValidPos.remainingBytes (pos : String.ValidPos s) : Nat :=
s.utf8ByteSize - pos.offset.byteIdx
theorem ValidPos.remainingBytes_next_lt_of_lt {p p' : String.ValidPos s} (h' : p' < p) :
p.remainingBytes < p'.remainingBytes := by
simp only [ValidPos.lt_iff, Pos.Raw.lt_iff] at h' ⊢
simp only [remainingBytes]
have : p.offset.byteIdx ≤ s.utf8ByteSize := p.isValid.le_rawEndPos
omega
theorem ValidPos.lt_next {p : String.ValidPos s} (h) : p < p.next h := by
simp only [next, lt_iff, Slice.Pos.offset_ofSlice, Pos.Raw.lt_iff, Slice.Pos.byteIdx_offset_next,
offset_toSlice, Nat.lt_add_right_iff_pos]
exact Char.utf8Size_pos _
theorem ValidPos.remainingBytes_next_lt (pos : String.ValidPos s) (h) :
(pos.next h).remainingBytes < pos.remainingBytes :=
remainingBytes_next_lt_of_lt (pos.lt_next h)
def mangleAux (s : String) (pos : s.ValidPos) (r : String) : String :=
if h : pos = s.endValidPos then r else
let c := pos.get h
let pos := pos.next h
if c.isAlpha || c.isDigit then
mangleAux s pos (r.push c)
else if c = '_' then
mangleAux s pos (r ++ "__")
else if c.toNat < 0x100 then
mangleAux s pos (pushHex 2 c.val (r ++ "_x"))
else if c.toNat < 0x10000 then
mangleAux s pos (pushHex 4 c.val (r ++ "_u"))
else
mangleAux s pos (pushHex 8 c.val (r ++ "_U"))
termination_by pos.remainingBytes
decreasing_by all_goals apply ValidPos.remainingBytes_next_lt
public def mangle (s : String) : String :=
mangleAux s s.startValidPos ""
end String
namespace Lean
private def Name.mangleAux : Name → String
def checkLowerHex : Nat → (s : String) → s.ValidPos → Bool
| 0, _, _ => true
| k + 1, s, pos =>
if h : pos = s.endValidPos then
false
else
let ch := pos.get h
(ch.isDigit || (ch.val >= 97 && ch.val <= 102)) && -- 0-9a-f
checkLowerHex k s (pos.next h)
theorem valid_of_checkLowerHex (h : checkLowerHex n s p) :
(String.Pos.Raw.mk (p.offset.byteIdx + n)).IsValid s := by
fun_induction checkLowerHex
· rename_i p
exact p.isValid
· contradiction
· rename_i k s p hp ch ih
simp only [Bool.and_eq_true, Bool.or_eq_true, decide_eq_true_eq] at h
specialize ih h.2
refine cast ?_ ih
congr 2
simp only [String.ValidPos.next, String.Slice.Pos.offset_ofSlice,
String.Slice.Pos.byteIdx_offset_next, String.ValidPos.offset_toSlice, Nat.succ_eq_add_one]
change p.offset.byteIdx + ch.utf8Size + k = _
rw [Char.utf8Size_eq_one_iff.mpr, Nat.add_assoc, Nat.add_comm 1]
rcases h.1 with h | h
· simp only [Char.isDigit, Bool.and_eq_true, decide_eq_true_eq] at h
exact UInt32.le_trans h.2 (by decide)
· exact UInt32.le_trans h.2 (by decide)
def parseLowerHex : (n : Nat) → (s : String) → (p : s.ValidPos) →
checkLowerHex n s p → Nat → Nat
| 0, _, _, _, n => n
| k + 1, s, pos, h, n =>
have hpos : pos ≠ s.endValidPos := by
rw [checkLowerHex] at h
split at h <;> trivial
let ch := pos.get hpos
let pos := pos.next hpos
have h' : checkLowerHex k s pos := by
simp only [checkLowerHex, hpos, ↓reduceDIte, ge_iff_le, Bool.and_eq_true, Bool.or_eq_true,
decide_eq_true_eq, pos] at h ⊢
exact h.2
if ch.isDigit then parseLowerHex k s pos h' (n <<< 4 ||| (ch.val - 48).toNat)
else parseLowerHex k s pos h' (n <<< 4 ||| (ch.val - 87).toNat)
def checkDisambiguation (s : String) (p : s.ValidPos) : Bool :=
if h : _ then
let b := p.get h
if b = '_' then
checkDisambiguation s (p.next h)
else if b = 'x' then
checkLowerHex 2 s (p.next h)
else if b = 'u' then
checkLowerHex 4 s (p.next h)
else if b = 'U' then
checkLowerHex 8 s (p.next h)
else if b.val >= 48 && b.val <= 57 then
true
else false
else true
termination_by p.remainingBytes
decreasing_by apply p.remainingBytes_next_lt
def needDisambiguation (prev : Name) (next : String) : Bool :=
(match prev with
| .str _ s => ∃ h, (s.endValidPos.prev h).get (by simp) = '_'
| _ => false) || checkDisambiguation next next.startValidPos
def Name.mangleAux : Name → String
| Name.anonymous => ""
| Name.str p s =>
let m := String.mangle s
match p with
| Name.anonymous => m
| p => mangleAux p ++ "_" ++ m
| Name.num p n => mangleAux p ++ "_" ++ toString n ++ "_"
| Name.anonymous =>
if checkDisambiguation m m.startValidPos then "00" ++ m else m
| p =>
let m1 := mangleAux p
m1 ++ (if needDisambiguation p m then "_00" else "_") ++ m
| Name.num p n =>
match p with
| Name.anonymous => n.repr ++ "_"
| p =>
mangleAux p ++ "_" ++ n.repr ++ "_"
@[export lean_name_mangle]
def Name.mangle (n : Name) (pre : String := "l_") : String :=
public def Name.mangle (n : Name) (pre : String := "l_") : String :=
pre ++ Name.mangleAux n
@[export lean_mk_module_initialization_function_name]
def mkModuleInitializationFunctionName (moduleName : Name) : String :=
public def mkModuleInitializationFunctionName (moduleName : Name) : String :=
"initialize_" ++ moduleName.mangle ""
-- assumes `s` has been generated `Name.mangle n ""`
def Name.demangleAux (s : String) (p : s.ValidPos) (res : Name)
(acc : String) (ucount : Nat) : Name :=
if h : p = s.endValidPos then res.str (acc.pushn '_' (ucount / 2)) else
let ch := p.get h
let p := p.next h
if ch = '_' then demangleAux s p res acc (ucount + 1) else
if ucount % 2 = 0 then
demangleAux s p res (acc.pushn '_' (ucount / 2) |>.push ch) 0
else if ch.isDigit then
let res := res.str (acc.pushn '_' (ucount / 2))
if h : ch = '0' ∧ ∃ h : p ≠ s.endValidPos, p.get h = '0' then
demangleAux s (p.next h.2.1) res "" 0
else
decodeNum s p res (ch.val - 48).toNat
else if h : ch = 'x' ∧ checkLowerHex 2 s p then
let acc := acc.pushn '_' (ucount / 2)
demangleAux s ⟨_, valid_of_checkLowerHex h.2⟩ res (acc.push (Char.ofNat (parseLowerHex 2 s p h.2 0))) 0
else if h : ch = 'u' ∧ checkLowerHex 4 s p then
let acc := acc.pushn '_' (ucount / 2)
demangleAux s ⟨_, valid_of_checkLowerHex h.2⟩ res (acc.push (Char.ofNat (parseLowerHex 4 s p h.2 0))) 0
else if h : ch = 'U' ∧ checkLowerHex 8 s p then
let acc := acc.pushn '_' (ucount / 2)
demangleAux s ⟨_, valid_of_checkLowerHex h.2⟩ res (acc.push (Char.ofNat (parseLowerHex 8 s p h.2 0))) 0
else
demangleAux s p (res.str acc) ("".pushn '_' (ucount / 2) |>.push ch) 0
termination_by p.remainingBytes
decreasing_by
· apply String.ValidPos.remainingBytes_next_lt
· apply String.ValidPos.remainingBytes_next_lt
· apply String.ValidPos.remainingBytes_next_lt_of_lt
(Nat.lt_trans (String.ValidPos.lt_next _) (String.ValidPos.lt_next _))
· apply String.ValidPos.remainingBytes_next_lt
· apply String.ValidPos.remainingBytes_next_lt_of_lt
(Nat.lt_trans (String.ValidPos.lt_next _) (Nat.lt_add_of_pos_right (by decide)))
· apply String.ValidPos.remainingBytes_next_lt_of_lt
(Nat.lt_trans (String.ValidPos.lt_next _) (Nat.lt_add_of_pos_right (by decide)))
· apply String.ValidPos.remainingBytes_next_lt_of_lt
(Nat.lt_trans (String.ValidPos.lt_next _) (Nat.lt_add_of_pos_right (by decide)))
· apply String.ValidPos.remainingBytes_next_lt
where
decodeNum (s : String) (p : s.ValidPos) (res : Name) (n : Nat) : Name :=
if h : p = s.endValidPos then res.num n else
let ch := p.get h
let p := p.next h
if ch.isDigit then
decodeNum s p res (n * 10 + (ch.val - 48).toNat)
else -- assume ch = '_'
let res := res.num n
if h : p = s.endValidPos then res else
nameStart s (p.next h) res -- assume s.get' p h = '_'
termination_by p.remainingBytes
decreasing_by
· apply String.ValidPos.remainingBytes_next_lt
· apply String.ValidPos.remainingBytes_next_lt_of_lt
(Nat.lt_trans (String.ValidPos.lt_next _) (String.ValidPos.lt_next _))
nameStart (s : String) (p : s.ValidPos) (res : Name) : Name :=
if h : p = s.endValidPos then res else
let ch := p.get h
let p := p.next h
if ch.isDigit then
if h : ch = '0' ∧ ∃ h : p ≠ s.endValidPos, p.get h = '0' then
demangleAux s (p.next h.2.1) res "" 0
else
decodeNum s p res (ch.val - 48).toNat
else if ch = '_' then
demangleAux s p res "" 1
else
demangleAux s p res (String.singleton ch) 0
termination_by p.remainingBytes
decreasing_by
· apply String.ValidPos.remainingBytes_next_lt_of_lt
(Nat.lt_trans (String.ValidPos.lt_next _) (String.ValidPos.lt_next _))
all_goals apply String.ValidPos.remainingBytes_next_lt
/-- Assuming `s` has been produced by `Name.mangle _ ""`, return the original name. -/
public def Name.demangle (s : String) : Name :=
demangleAux.nameStart s s.startValidPos .anonymous
/--
Returns the demangled version of `s`, if it's the result of `Name.mangle _ ""`. Otherwise returns
`none`.
-/
public def Name.demangle? (s : String) : Option Name :=
let n := demangle s
if mangleAux n = s then some n else none
-- For correctness of mangle/demangle, see https://gist.github.com/Rob23oba/5ddef42a1743858e9334461ca57c4be8
end Lean

View file

@ -1,5 +1,6 @@
#include "util/options.h"
// stage0 update, please
namespace lean {
options get_default_options() {
options opts;

View file

@ -1,4 +1,5 @@
import Lean
def main : IO Unit := pure () #exit -- TODO: remove after stage0 update
open Lean

View file

@ -7,6 +7,7 @@ import Lean.Data.AssocList
import Std.Data.HashMap
import Std.Data.Iterators.Producers.Range
import Std.Data.Iterators.Combinators.StepSize
def main : IO Unit := pure () #exit -- TODO: remove after stage0 update
open Lean

View file

@ -1,2 +0,0 @@
SAT
-104 -253 65 75 -271 269 234 -125 59 -291 23 -150 -127 -232 11 -66 -199 133 -51 -120 -141 276 24 6 -85 28 240 54 -182 -160 128 14 -212 269 -154 28 134 -125 -49 192 14 -130 69 -53 -157 264 103 48 -271 184

View file

@ -1,3 +1,5 @@
def main : IO Unit := pure () /- -- TODO: remove after stage0 update
def main : IO Unit := do
IO.eprintln $ "\rfailed at counter-example"
IO.eprintln $ "\tfailed at counter-example"
-/

View file

@ -1,2 +0,0 @@
failed at counter-example
failed at counter-example

View file

@ -1,4 +1,5 @@
import Lean
def main : IO Unit := pure () /- -- TODO: remove after stage0 update
open Lean
@ -8,3 +9,4 @@ IO.println e
IO.println s!"hash: {e.hash}"
IO.println e.getAppArgs
pure 0
-/

View file

@ -1,3 +0,0 @@
f a b
hash: 2008687407
#[a, b]

View file

@ -1,3 +1,4 @@
def main : IO Unit := pure () /- -- TODO: remove after stage0 update
def showChars : Nat → String → String.Pos.Raw → IO Unit
| 0, _, _ => pure ()
| n+1, s, idx => do
@ -27,3 +28,4 @@ IO.println ("ab".isPrefixOf "a") *>
IO.println ("αb".isPrefixOf "αbc") *>
IO.println ("\x00a").length *>
pure 0
-/

View file

@ -1,31 +0,0 @@
hello α_world_β
llo α_world_β
llo α_world_β
llo α_world_
_world_β
17
"aaa"
>> h
>> e
>> l
>> l
>> o
>>
>> α
>> _
>> w
>> o
>> r
>> l
>> d
>> _
>> β
true
true
true
true
false
false
true
2

View file

@ -1,6 +0,0 @@
import Lean.Compiler.NameMangling
#eval "ab12".mangle
#eval "ÿ".mangle
#eval "α₁".mangle
#eval "𝒫".mangle

View file

@ -0,0 +1,89 @@
module
import Lean.Compiler.NameMangling
/-!
# Test behavior of name mangling
-/
def checkMangle (n : Lean.Name) (s : String) : IO Unit := do
if n.mangle "" ≠ s then
throw <| .userError s!"failed: {n} mangles to {n.mangle ""} but expected {s}"
if .demangle s ≠ n then
throw <| .userError s!"failed: {s} demangles to {Lean.Name.demangle s} but expected {n}"
/-!
Mangling simple identifiers with optional number components and preceding underscores.
-/
#eval checkMangle `ab12 "ab12"
#eval checkMangle ``Lean.Name.mangle "Lean_Name_mangle"
#eval checkMangle `Lean.Name.mangle._aux "Lean_Name_mangle___aux"
#eval checkMangle ((`_private.Lean.Compiler.NameMangling).num 0 ++ `Lean.Name.mangleAux)
"__private_Lean_Compiler_NameMangling_0__Lean_Name_mangleAux"
/-!
Escape sequences in mangled identifiers.
-/
#eval checkMangle `«ÿ» "00_xff"
#eval checkMangle `«α₁» "00_u03b1_u2081"
#eval checkMangle `«𝒫» "00_U0001d4ab"
/-!
Escape sequence disambiguation
-/
#eval checkMangle `a' "a_x27"
#eval checkMangle `a.x27 "a_00x27"
#eval checkMangle `a_' "a___x27"
#eval checkMangle `a._x27 "a_00__x27"
#eval checkMangle `a.u0027 "a_00u0027"
#eval checkMangle `a.U00000027 "a_00U00000027"
#eval checkMangle `a.ucafe "a_00ucafe"
#eval checkMangle `a.uCAFE "a_uCAFE" -- uppercase does not need to be disambiguated
/-!
Trailing underscores in names
-/
#eval checkMangle `a._b "a___b"
#eval checkMangle `a_.b "a___00b"
#eval checkMangle `a_ "a__"
#eval checkMangle `a_.«» "a___00"
#eval checkMangle `a.__ "a_00____"
/-!
Empty name components
-/
#eval checkMangle `a_b "a__b"
#eval checkMangle `a.«».b "a_00_b"
#eval checkMangle `«».b "00_b"
#eval checkMangle `b_.«» "b___00"
/-!
Numbers vs numbers in text
-/
#eval checkMangle ((`a).num 2 ++ `b) "a_2__b"
#eval checkMangle `a.«2_b» "a_002__b"
/-!
Consecutive number components
-/
#eval checkMangle ((`a).num 2 |>.num 3 |>.str "" |>.num 4) "a_2__3__00_4_"
/-!
Preceding number components
-/
#eval checkMangle (.str (.num .anonymous 4) "hi") "4__hi"
/-!
Anonymous vs empty string
-/
#eval checkMangle .anonymous ""
#eval checkMangle `«» "00"
#eval checkMangle `«».«» "00_00"

View file

@ -1,5 +1,7 @@
#!/usr/bin/env bash
exit 0 # TODO: remove after stage0 update
rm -rf .lake/build
lake build

View file

@ -1,4 +1,6 @@
#!/usr/bin/env bash
exit 0 # TODO: remove after stage0 update
rm -rf .lake/build
lake exe user_attr

View file

@ -1,4 +1,5 @@
#!/usr/bin/env bash
exit 0 # TODO: remove after stage0 update
source ../common.sh
# LEAN_EXPORTING needs to be defined for .c files included in shared libraries