lean4-htt/script/profiler/lean_demangle.py
Leonardo de Moura 91bd6e19a7
feat: add Lean name demangler and profiling pipeline (#12517)
This PR adds tooling for profiling Lean programs with human-readable
function names in Firefox Profiler:

- **`script/lean_profile.sh`** — One-command pipeline: record with
samply, symbolicate, demangle, and open in Firefox Profiler
- **`script/profiler/lean_demangle.py`** — Faithful port of
`Name.demangleAux` from `NameMangling.lean`, with a postprocessor that
folds compiler suffixes into compact annotations (`[λ, arity↓]`, `spec
at context[flags]`)
- **`script/profiler/symbolicate_profile.py`** — Resolves raw addresses
via samply's symbolication API
- **`script/profiler/serve_profile.py`** — Serves demangled profiles to
Firefox Profiler without re-symbolication
- **`PROFILER_README.md`** — Documentation including a guide to reading
demangled names

### Example output in Firefox Profiler

| Raw C symbol | Demangled |
|---|---|
| `l_Lean_Meta_Sym_main` | `Lean.Meta.Sym.main` |
| `l_Lean_Meta_foo___redArg___lam__0` | `Lean.Meta.foo [λ, arity↓]` |
| `l_Lean_MVarId_withContext___at__...___spec__2___boxed` |
`Lean.MVarId.withContext [boxed] spec at Lean.Meta.bar[λ, arity↓]` |

Example:

<img width="1145" height="570" alt="image"
src="https://github.com/user-attachments/assets/8d23cc6a-1b89-4c60-9f4a-9f9f0f6e7697"
/>


🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 03:27:58 +00:00

779 lines
24 KiB
Python

#!/usr/bin/env python3
"""
Lean name demangler.
Demangles C symbol names produced by the Lean 4 compiler back into
readable Lean hierarchical names.
Usage as a filter (like c++filt):
echo "l_Lean_Meta_Sym_main" | python lean_demangle.py
Usage as a module:
from lean_demangle import demangle_lean_name
print(demangle_lean_name("l_Lean_Meta_Sym_main"))
"""
import sys
# ---------------------------------------------------------------------------
# String.mangle / unmangle
# ---------------------------------------------------------------------------
def _is_ascii_alnum(ch):
"""Check if ch is an ASCII letter or digit (matching Lean's isAlpha/isDigit)."""
return ('a' <= ch <= 'z') or ('A' <= ch <= 'Z') or ('0' <= ch <= '9')
def mangle_string(s):
"""Port of Lean's String.mangle: escape a single string for C identifiers."""
result = []
for ch in s:
if _is_ascii_alnum(ch):
result.append(ch)
elif ch == '_':
result.append('__')
else:
code = ord(ch)
if code < 0x100:
result.append('_x' + format(code, '02x'))
elif code < 0x10000:
result.append('_u' + format(code, '04x'))
else:
result.append('_U' + format(code, '08x'))
return ''.join(result)
def _parse_hex(s, pos, n):
"""Parse n lowercase hex digits at pos. Returns (new_pos, value) or None."""
if pos + n > len(s):
return None
val = 0
for i in range(n):
c = s[pos + i]
if '0' <= c <= '9':
val = (val << 4) | (ord(c) - ord('0'))
elif 'a' <= c <= 'f':
val = (val << 4) | (ord(c) - ord('a') + 10)
else:
return None
return (pos + n, val)
# ---------------------------------------------------------------------------
# Name mangling (for round-trip verification)
# ---------------------------------------------------------------------------
def _check_disambiguation(m):
"""Port of Lean's checkDisambiguation: does mangled string m need a '00' prefix?"""
pos = 0
while pos < len(m):
ch = m[pos]
if ch == '_':
pos += 1
continue
if ch == 'x':
return _parse_hex(m, pos + 1, 2) is not None
if ch == 'u':
return _parse_hex(m, pos + 1, 4) is not None
if ch == 'U':
return _parse_hex(m, pos + 1, 8) is not None
if '0' <= ch <= '9':
return True
return False
# all underscores or empty
return True
def _need_disambiguation(prev_component, mangled_next):
"""Port of Lean's needDisambiguation."""
# Check if previous component (as a string) ends with '_'
prev_ends_underscore = (isinstance(prev_component, str) and
len(prev_component) > 0 and
prev_component[-1] == '_')
return prev_ends_underscore or _check_disambiguation(mangled_next)
def mangle_name(components, prefix="l_"):
"""
Mangle a list of name components (str or int) into a C symbol.
Port of Lean's Name.mangle.
"""
if not components:
return prefix
parts = []
prev = None
for i, comp in enumerate(components):
if isinstance(comp, int):
if i == 0:
parts.append(str(comp) + '_')
else:
parts.append('_' + str(comp) + '_')
else:
m = mangle_string(comp)
if i == 0:
if _check_disambiguation(m):
parts.append('00' + m)
else:
parts.append(m)
else:
if _need_disambiguation(prev, m):
parts.append('_00' + m)
else:
parts.append('_' + m)
prev = comp
return prefix + ''.join(parts)
# ---------------------------------------------------------------------------
# Name demangling
# ---------------------------------------------------------------------------
def demangle_body(s):
"""
Demangle a string produced by Name.mangleAux (without prefix).
Returns a list of components (str or int).
This is a faithful port of Lean's Name.demangleAux from NameMangling.lean.
"""
components = []
length = len(s)
def emit(comp):
components.append(comp)
def decode_num(pos, n):
"""Parse remaining digits, emit numeric component, continue."""
while pos < length:
ch = s[pos]
if '0' <= ch <= '9':
n = n * 10 + (ord(ch) - ord('0'))
pos += 1
else:
# Expect '_' (trailing underscore of numeric encoding)
pos += 1 # skip '_'
emit(n)
if pos >= length:
return pos
# Skip separator '_' and go to name_start
pos += 1
return name_start(pos)
# End of string
emit(n)
return pos
def name_start(pos):
"""Start parsing a new name component."""
if pos >= length:
return pos
ch = s[pos]
pos += 1
if '0' <= ch <= '9':
# Check for '00' disambiguation
if ch == '0' and pos < length and s[pos] == '0':
pos += 1
return demangle_main(pos, "", 0)
else:
return decode_num(pos, ord(ch) - ord('0'))
elif ch == '_':
return demangle_main(pos, "", 1)
else:
return demangle_main(pos, ch, 0)
def demangle_main(pos, acc, ucount):
"""Main demangling loop."""
while pos < length:
ch = s[pos]
pos += 1
if ch == '_':
ucount += 1
continue
if ucount % 2 == 0:
# Even underscores: literal underscores in component name
acc += '_' * (ucount // 2) + ch
ucount = 0
continue
# Odd ucount: separator or escape
if '0' <= ch <= '9':
# End current str component, start number
emit(acc + '_' * (ucount // 2))
if ch == '0' and pos < length and s[pos] == '0':
pos += 1
return demangle_main(pos, "", 0)
else:
return decode_num(pos, ord(ch) - ord('0'))
# Try hex escapes
if ch == 'x':
result = _parse_hex(s, pos, 2)
if result is not None:
new_pos, val = result
acc += '_' * (ucount // 2) + chr(val)
pos = new_pos
ucount = 0
continue
if ch == 'u':
result = _parse_hex(s, pos, 4)
if result is not None:
new_pos, val = result
acc += '_' * (ucount // 2) + chr(val)
pos = new_pos
ucount = 0
continue
if ch == 'U':
result = _parse_hex(s, pos, 8)
if result is not None:
new_pos, val = result
acc += '_' * (ucount // 2) + chr(val)
pos = new_pos
ucount = 0
continue
# Name separator
emit(acc)
acc = '_' * (ucount // 2) + ch
ucount = 0
# End of string
acc += '_' * (ucount // 2)
if acc:
emit(acc)
return pos
name_start(0)
return components
# ---------------------------------------------------------------------------
# Prefix handling for lp_ (package prefix)
# ---------------------------------------------------------------------------
def _is_valid_string_mangle(s):
"""Check if s is a valid output of String.mangle (no trailing bare _)."""
pos = 0
length = len(s)
while pos < length:
ch = s[pos]
if _is_ascii_alnum(ch):
pos += 1
elif ch == '_':
if pos + 1 >= length:
return False # trailing bare _
nch = s[pos + 1]
if nch == '_':
pos += 2
elif nch == 'x' and _parse_hex(s, pos + 2, 2) is not None:
pos = _parse_hex(s, pos + 2, 2)[0]
elif nch == 'u' and _parse_hex(s, pos + 2, 4) is not None:
pos = _parse_hex(s, pos + 2, 4)[0]
elif nch == 'U' and _parse_hex(s, pos + 2, 8) is not None:
pos = _parse_hex(s, pos + 2, 8)[0]
else:
return False
else:
return False
return True
def _skip_string_mangle(s, pos):
"""
Skip past a String.mangle output in s starting at pos.
Returns the position after the mangled string (where we expect the separator '_').
This is a greedy scan.
"""
length = len(s)
while pos < length:
ch = s[pos]
if _is_ascii_alnum(ch):
pos += 1
elif ch == '_':
if pos + 1 < length:
nch = s[pos + 1]
if nch == '_':
pos += 2
elif nch == 'x' and _parse_hex(s, pos + 2, 2) is not None:
pos = _parse_hex(s, pos + 2, 2)[0]
elif nch == 'u' and _parse_hex(s, pos + 2, 4) is not None:
pos = _parse_hex(s, pos + 2, 4)[0]
elif nch == 'U' and _parse_hex(s, pos + 2, 8) is not None:
pos = _parse_hex(s, pos + 2, 8)[0]
else:
return pos # bare '_': separator
else:
return pos
else:
return pos
return pos
def _find_lp_body(s):
"""
Given s = everything after 'lp_' in a symbol, find where the declaration
body (Name.mangleAux output) starts.
Returns the start index of the body within s, or None.
Strategy: try all candidate split points where the package part is a valid
String.mangle output and the body round-trips. Prefer the longest valid
package name (most specific match).
"""
length = len(s)
# Collect candidate split positions: every '_' that could be the separator
candidates = []
pos = 0
while pos < length:
if s[pos] == '_':
candidates.append(pos)
pos += 1
# Try each candidate; collect all valid splits
valid_splits = []
for split_pos in candidates:
pkg_part = s[:split_pos]
if not pkg_part:
continue
if not _is_valid_string_mangle(pkg_part):
continue
body = s[split_pos + 1:]
if not body:
continue
components = demangle_body(body)
if not components:
continue
remangled = mangle_name(components, prefix="")
if remangled == body:
first = components[0]
# Score: prefer first component starting with uppercase
has_upper = isinstance(first, str) and first and first[0].isupper()
valid_splits.append((split_pos, has_upper))
if valid_splits:
# Among splits where first decl component starts uppercase, pick longest pkg.
# Otherwise pick shortest pkg.
upper_splits = [s for s in valid_splits if s[1]]
if upper_splits:
best = max(upper_splits, key=lambda x: x[0])
else:
best = min(valid_splits, key=lambda x: x[0])
return best[0] + 1
# Fallback: greedy String.mangle scan
greedy_pos = _skip_string_mangle(s, 0)
if greedy_pos < length and s[greedy_pos] == '_':
return greedy_pos + 1
return None
# ---------------------------------------------------------------------------
# Format name components for display
# ---------------------------------------------------------------------------
def format_name(components):
"""Format a list of name components as a dot-separated string."""
return '.'.join(str(c) for c in components)
# ---------------------------------------------------------------------------
# Human-friendly postprocessing
# ---------------------------------------------------------------------------
# Compiler-generated suffix components — exact match
_SUFFIX_FLAGS_EXACT = {
'_redArg': 'arity\u2193',
'_boxed': 'boxed',
'_impl': 'impl',
}
# Compiler-generated suffix prefixes — match with optional _N index
# e.g., _lam, _lam_0, _lam_3, _lambda_0, _closed_2
_SUFFIX_FLAGS_PREFIX = {
'_lam': '\u03bb',
'_lambda': '\u03bb',
'_elam': '\u03bb',
'_jp': 'jp',
'_closed': 'closed',
}
def _match_suffix(component):
"""
Check if a string component is a compiler-generated suffix.
Returns the flag label or None.
Handles both exact matches (_redArg, _boxed) and indexed suffixes
(_lam_0, _lambda_2, _closed_0) produced by appendIndexAfter.
"""
if not isinstance(component, str):
return None
if component in _SUFFIX_FLAGS_EXACT:
return _SUFFIX_FLAGS_EXACT[component]
if component in _SUFFIX_FLAGS_PREFIX:
return _SUFFIX_FLAGS_PREFIX[component]
# Check for indexed suffix: prefix + _N
for prefix, label in _SUFFIX_FLAGS_PREFIX.items():
if component.startswith(prefix + '_'):
rest = component[len(prefix) + 1:]
if rest.isdigit():
return label
return None
def _strip_private(components):
"""Strip _private.Module.0. prefix. Returns (stripped_parts, is_private)."""
if (len(components) >= 3 and isinstance(components[0], str) and
components[0] == '_private'):
for i in range(1, len(components)):
if components[i] == 0:
if i + 1 < len(components):
return components[i + 1:], True
break
return components, False
def _strip_spec_suffixes(components):
"""Strip trailing spec_N components (from appendIndexAfter)."""
parts = list(components)
while parts and isinstance(parts[-1], str) and parts[-1].startswith('spec_'):
rest = parts[-1][5:]
if rest.isdigit():
parts.pop()
else:
break
return parts
def _is_spec_index(component):
"""Check if a component is a spec_N index (from appendIndexAfter)."""
return (isinstance(component, str) and
component.startswith('spec_') and component[5:].isdigit())
def _parse_spec_entries(rest):
"""Parse _at_..._spec pairs into separate spec context entries.
Given components starting from the first _at_, returns:
- entries: list of component lists, one per _at_..._spec block
- remaining: components after the last _spec N (trailing suffixes)
"""
entries = []
current_ctx = None
remaining = []
skip_next = False
for p in rest:
if skip_next:
skip_next = False
continue
if isinstance(p, str) and p == '_at_':
if current_ctx is not None:
entries.append(current_ctx)
current_ctx = []
continue
if isinstance(p, str) and p == '_spec':
if current_ctx is not None:
entries.append(current_ctx)
current_ctx = None
skip_next = True
continue
if isinstance(p, str) and p.startswith('_spec'):
if current_ctx is not None:
entries.append(current_ctx)
current_ctx = None
continue
if current_ctx is not None:
current_ctx.append(p)
else:
remaining.append(p)
if current_ctx is not None:
entries.append(current_ctx)
return entries, remaining
def _process_spec_context(components):
"""Process a spec context into a clean name and its flags.
Returns (name_parts, flags) where name_parts are the cleaned components
and flags is a deduplicated list of flag labels from compiler suffixes.
"""
parts = list(components)
parts, _ = _strip_private(parts)
name_parts = []
ctx_flags = []
seen = set()
for p in parts:
flag = _match_suffix(p)
if flag is not None:
if flag not in seen:
ctx_flags.append(flag)
seen.add(flag)
elif _is_spec_index(p):
pass
else:
name_parts.append(p)
return name_parts, ctx_flags
def postprocess_name(components):
"""
Transform raw demangled components into a human-friendly display string.
Applies:
- Private name cleanup: _private.Module.0.Name.foo -> Name.foo [private]
- Hygienic name cleanup: strips _@.module._hygCtx._hyg.N
- Suffix folding: _redArg, _boxed, _lam_0, etc. -> [flags]
- Specialization: f._at_.g._spec.N -> f spec at g
Shown after base [flags], with context flags: spec at g[ctx_flags]
"""
if not components:
return ""
parts = list(components)
flags = []
spec_entries = []
# --- Strip _private prefix ---
parts, is_private = _strip_private(parts)
# --- Strip hygienic suffixes: everything from _@ onward ---
at_idx = None
for i, p in enumerate(parts):
if isinstance(p, str) and p.startswith('_@'):
at_idx = i
break
if at_idx is not None:
parts = parts[:at_idx]
# --- Handle specialization: _at_ ... _spec N ---
at_positions = [i for i, p in enumerate(parts)
if isinstance(p, str) and p == '_at_']
if at_positions:
first_at = at_positions[0]
base = parts[:first_at]
rest = parts[first_at:]
entries, remaining = _parse_spec_entries(rest)
for ctx_components in entries:
ctx_name, ctx_flags = _process_spec_context(ctx_components)
if ctx_name or ctx_flags:
spec_entries.append((ctx_name, ctx_flags))
parts = base + remaining
# --- Collect suffix flags from the end ---
while parts:
last = parts[-1]
flag = _match_suffix(last)
if flag is not None:
flags.append(flag)
parts.pop()
elif isinstance(last, int) and len(parts) >= 2:
prev_flag = _match_suffix(parts[-2])
if prev_flag is not None:
flags.append(prev_flag)
parts.pop() # remove the number
parts.pop() # remove the suffix
else:
break
else:
break
if is_private:
flags.append('private')
# --- Format result ---
name = '.'.join(str(c) for c in parts) if parts else '?'
result = name
if flags:
flag_str = ', '.join(flags)
result += f' [{flag_str}]'
for ctx_name, ctx_flags in spec_entries:
ctx_str = '.'.join(str(c) for c in ctx_name) if ctx_name else '?'
if ctx_flags:
ctx_flag_str = ', '.join(ctx_flags)
result += f' spec at {ctx_str}[{ctx_flag_str}]'
else:
result += f' spec at {ctx_str}'
return result
# ---------------------------------------------------------------------------
# Main demangling entry point
# ---------------------------------------------------------------------------
def demangle_lean_name_raw(mangled):
"""
Demangle a Lean C symbol, preserving all internal name components.
Returns the exact demangled name with all compiler-generated suffixes
intact. Use demangle_lean_name() for human-friendly output.
"""
try:
return _demangle_lean_name_inner(mangled, human_friendly=False)
except Exception:
return mangled
def demangle_lean_name(mangled):
"""
Demangle a C symbol name produced by the Lean 4 compiler.
Returns a human-friendly demangled name with compiler suffixes folded
into readable flags. Use demangle_lean_name_raw() to preserve all
internal components.
"""
try:
return _demangle_lean_name_inner(mangled, human_friendly=True)
except Exception:
return mangled
def _demangle_lean_name_inner(mangled, human_friendly=True):
"""Inner demangle that may raise on malformed input."""
if mangled == "_lean_main":
return "[lean] main"
# Handle lean_ runtime functions
if human_friendly and mangled.startswith("lean_apply_"):
rest = mangled[11:]
if rest.isdigit():
return f"<apply/{rest}>"
# Strip .cold.N suffix (LLVM linker cold function clones)
cold_suffix = ""
core = mangled
dot_pos = core.find('.cold.')
if dot_pos >= 0:
cold_suffix = " " + core[dot_pos:]
core = core[:dot_pos]
elif core.endswith('.cold'):
cold_suffix = " .cold"
core = core[:-5]
result = _demangle_core(core, human_friendly)
if result is None:
return mangled
return result + cold_suffix
def _demangle_core(mangled, human_friendly=True):
"""Demangle a symbol without .cold suffix. Returns None if not a Lean name."""
fmt = postprocess_name if human_friendly else format_name
# _init_ prefix
if mangled.startswith("_init_"):
rest = mangled[6:]
body, pkg_display = _strip_lean_prefix(rest)
if body is None:
return None
components = demangle_body(body)
if not components:
return None
name = fmt(components)
if pkg_display:
return f"[init] {name} ({pkg_display})"
return f"[init] {name}"
# initialize_ prefix (module init functions)
if mangled.startswith("initialize_"):
rest = mangled[11:]
# With package: initialize_lp_{pkg}_{body} or initialize_l_{body}
body, pkg_display = _strip_lean_prefix(rest)
if body is not None:
components = demangle_body(body)
if components:
name = fmt(components)
if pkg_display:
return f"[module_init] {name} ({pkg_display})"
return f"[module_init] {name}"
# Without package: initialize_{Name.mangleAux(moduleName)}
if rest:
components = demangle_body(rest)
if components:
return f"[module_init] {fmt(components)}"
return None
# l_ or lp_ prefix
body, pkg_display = _strip_lean_prefix(mangled)
if body is None:
return None
components = demangle_body(body)
if not components:
return None
name = fmt(components)
if pkg_display:
return f"{name} ({pkg_display})"
return name
def _strip_lean_prefix(s):
"""
Strip the l_ or lp_ prefix from a mangled symbol.
Returns (body, pkg_display) where body is the Name.mangleAux output
and pkg_display is None or a string describing the package.
Returns (None, None) if the string doesn't have a recognized prefix.
"""
if s.startswith("l_"):
return (s[2:], None)
if s.startswith("lp_"):
after_lp = s[3:]
body_start = _find_lp_body(after_lp)
if body_start is not None:
pkg_mangled = after_lp[:body_start - 1]
# Unmangle the package name
pkg_components = demangle_body(pkg_mangled)
if pkg_components and len(pkg_components) == 1 and isinstance(pkg_components[0], str):
pkg_display = pkg_components[0]
else:
pkg_display = pkg_mangled
return (after_lp[body_start:], pkg_display)
# Fallback: treat everything after lp_ as body
return (after_lp, "?")
return (None, None)
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main():
"""Filter stdin or arguments, demangling Lean names."""
import argparse
parser = argparse.ArgumentParser(
description="Demangle Lean 4 C symbol names (like c++filt for Lean)")
parser.add_argument('names', nargs='*',
help='Names to demangle (reads stdin if none given)')
parser.add_argument('--raw', action='store_true',
help='Output exact demangled names without postprocessing')
args = parser.parse_args()
demangle = demangle_lean_name_raw if args.raw else demangle_lean_name
if args.names:
for name in args.names:
print(demangle(name))
else:
for line in sys.stdin:
print(demangle(line.rstrip('\n')))
if __name__ == '__main__':
main()