From f9e140838e5fb12f0726de36bcdf451eb08559fe Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Wed, 8 Oct 2025 14:12:03 -0700 Subject: [PATCH] feat: `hexnum` parser (#10716) This PR adds a new helper parser for implementing parsers that contain hexadecimal numbers. We are going to use it to implement anchors in the `grind` interactive mode. --- src/Init/Meta/Defs.lean | 19 +++++++++++++++ src/Init/Prelude.lean | 10 +++++++- src/Lean/Meta/Tactic/Grind/Anchor.lean | 7 ++++++ src/Lean/Parser.lean | 1 + src/Lean/Parser/Basic.lean | 15 +++++++++--- src/Lean/Parser/Extra.lean | 11 +++++++++ src/Lean/PrettyPrinter/Formatter.lean | 3 +++ src/Lean/PrettyPrinter/Parenthesizer.lean | 2 ++ stage0/src/stdlib_flags.h | 1 + tests/lean/interactive/1265.lean.expected.out | 16 +++++++++++++ tests/lean/run/hexnum.lean | 24 +++++++++++++++++++ 11 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 tests/lean/run/hexnum.lean diff --git a/src/Init/Meta/Defs.lean b/src/Init/Meta/Defs.lean index 59470a5246..f0f216a843 100644 --- a/src/Init/Meta/Defs.lean +++ b/src/Init/Meta/Defs.lean @@ -443,6 +443,10 @@ abbrev NumLit := TSyntax numLitKind Syntax that represents macro hygiene info. -/ abbrev HygieneInfo := TSyntax hygieneInfoKind +/-- +Syntax that represent a hexadecimal number without the `0x` prefix. +-/ +abbrev HexNum := TSyntax hexnumKind end Syntax @@ -1196,6 +1200,21 @@ Returns `0` if the syntax is malformed. def getNat (s : NumLit) : Nat := s.raw.isNatLit?.getD 0 +private def isHexNum? (stx : Syntax) : Option Nat := + match Syntax.isLit? hexnumKind stx with + | some val => Syntax.decodeHexLitAux val 0 0 + | _ => none + +/-- Returns the value of the hexadecimal numeral as a natural number. -/ +def getHexNumVal (s : Syntax.HexNum) : Nat := + isHexNum? s.raw |>.getD 0 + +/-- Returns the number of hexadecimal digits. -/ +def getHexNumSize (s : Syntax.HexNum) : Nat := + match Syntax.isLit? hexnumKind s.raw with + | some val => val.utf8ByteSize + | _ => 0 + /-- Extracts the parsed name from the syntax of an identifier. diff --git a/src/Init/Prelude.lean b/src/Init/Prelude.lean index 34e83054e5..0098fb5b55 100644 --- a/src/Init/Prelude.lean +++ b/src/Init/Prelude.lean @@ -4969,9 +4969,17 @@ abbrev strLitKind : SyntaxNodeKind := `str /-- `` `char `` is the node kind of character literals like `'A'`. -/ abbrev charLitKind : SyntaxNodeKind := `char -/-- `` `num `` is the node kind of number literals like `42`. -/ +/-- `` `num `` is the node kind of number literals like `42` and `0xa1` -/ abbrev numLitKind : SyntaxNodeKind := `num +/-- +`` `hexnum `` is the node kind of hexadecimal numbers like `ea10` +without the `0x` prefix. Recall that `hexnum` is not a token and must be prefixed. +For hexadecimal number literals, you should use `num` instead. +Example: `syntax anchor := "#" noWs hexnum`. + -/ +abbrev hexnumKind : SyntaxNodeKind := `hexnum + /-- `` `scientific `` is the node kind of floating point literals like `1.23e-3`. -/ abbrev scientificLitKind : SyntaxNodeKind := `scientific diff --git a/src/Lean/Meta/Tactic/Grind/Anchor.lean b/src/Lean/Meta/Tactic/Grind/Anchor.lean index b27d34270a..8ba97448d4 100644 --- a/src/Lean/Meta/Tactic/Grind/Anchor.lean +++ b/src/Lean/Meta/Tactic/Grind/Anchor.lean @@ -80,4 +80,11 @@ public partial def getAnchor (e : Expr) : GrindM UInt64 := do modify fun s => { s with anchors := s.anchors.insert { expr := e } a } return a +/-- +Example: `isAnchorPrefix 4 0x0c88 0x0c88ab10ef20206a` returns `true` +-/ +public def isAnchorPrefix (numHexDigits : Nat) (anchorPrefix : UInt64) (anchor : UInt64) : Bool := + let shift := 64 - numHexDigits.toUInt64*4 + anchorPrefix == anchor >>> shift + end Lean.Meta.Grind diff --git a/src/Lean/Parser.lean b/src/Lean/Parser.lean index b1af9e676a..00aa0d9992 100644 --- a/src/Lean/Parser.lean +++ b/src/Lean/Parser.lean @@ -51,6 +51,7 @@ builtin_initialize register_parser_alias withoutPosition { stackSz? := none } register_parser_alias withoutForbidden { stackSz? := none } register_parser_alias (kind := interpolatedStrKind) interpolatedStr + register_parser_alias (kind := hexnumKind) hexnum register_parser_alias orelse register_parser_alias andthen { stackSz? := none } register_parser_alias recover diff --git a/src/Lean/Parser/Basic.lean b/src/Lean/Parser/Basic.lean index 799adf57f9..934f3a10f9 100644 --- a/src/Lean/Parser/Basic.lean +++ b/src/Lean/Parser/Basic.lean @@ -893,9 +893,9 @@ def octalNumberFn (startPos : String.Pos.Raw) (includeWhitespace := true) : Pars private def isHexDigit (c : Char) : Bool := ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F') -def hexNumberFn (startPos : String.Pos.Raw) (includeWhitespace := true) : ParserFn := fun c s => +def hexNumberFn (startPos : String.Pos.Raw) (includeWhitespace := true) (kind := numLitKind) : ParserFn := fun c s => let s := takeDigitsFn isHexDigit "hexadecimal number" true c s - mkNodeToken numLitKind startPos includeWhitespace c s + mkNodeToken kind startPos includeWhitespace c s def numberFnAux (includeWhitespace := true) : ParserFn := fun c s => let startPos := s.pos @@ -910,7 +910,7 @@ def numberFnAux (includeWhitespace := true) : ParserFn := fun c s => else if curr == 'o' || curr == 'O' then octalNumberFn startPos includeWhitespace c (s.next c i) else if curr == 'x' || curr == 'X' then - hexNumberFn startPos includeWhitespace c (s.next c i) + hexNumberFn startPos includeWhitespace numLitKind c (s.next c i) else decimalNumberFn startPos includeWhitespace c (s.setPos i) else if curr.isDigit then @@ -1258,6 +1258,15 @@ def numLitNoAntiquot : Parser := { info := mkAtomicInfo "num" } +/-- Parses an hexadecimal numeral without the `0x` prefix. It is not a literal. -/ +def hexnumFn : ParserFn := fun ctx s => + hexNumberFn s.pos true hexnumKind ctx s + +def hexnumNoAntiquot : Parser := { + fn := hexnumFn + info := mkAtomicInfo "hexnum" +} + def scientificLitFn : ParserFn := expectTokenFn scientificLitKind "scientific number" def scientificLitNoAntiquot : Parser := { diff --git a/src/Lean/Parser/Extra.lean b/src/Lean/Parser/Extra.lean index 2ac54d90e1..00dbbc2ac2 100644 --- a/src/Lean/Parser/Extra.lean +++ b/src/Lean/Parser/Extra.lean @@ -124,6 +124,17 @@ You can use `TSyntax.getNat` to extract the number from the resulting syntax obj @[run_builtin_parser_attribute_hooks, builtin_doc] def numLit : Parser := withAntiquot (mkAntiquot "num" numLitKind) numLitNoAntiquot +/-- The parser `hexnum` parses a hexadecimal numeric literal not containing the `0x` prefix. + +It produces a `hexnumKind` node containing an atom with the text of the +literal. This parser is mainly used for creating atoms such `#`. Recall that `hexnum` +is not a token and this parser must be prefixed by another parser. + +For numerals such as `0xadef100a`, you should use `numLit`. +-/ +@[builtin_doc] def hexnum : Parser := + withAntiquot (mkAntiquot "hexnum" hexnumKind) hexnumNoAntiquot + /-- The parser `scientific` parses a scientific-notation literal, such as `1.3e-24`. This parser has arity 1: it produces a `scientificLitKind` node containing an atom with the text diff --git a/src/Lean/PrettyPrinter/Formatter.lean b/src/Lean/PrettyPrinter/Formatter.lean index 347761af8c..09c254eae9 100644 --- a/src/Lean/PrettyPrinter/Formatter.lean +++ b/src/Lean/PrettyPrinter/Formatter.lean @@ -506,6 +506,7 @@ def visitAtom (k : SyntaxNodeKind) : Formatter := do @[combinator_formatter strLitNoAntiquot, expose] def strLitNoAntiquot.formatter := visitAtom strLitKind @[combinator_formatter nameLitNoAntiquot, expose] def nameLitNoAntiquot.formatter := visitAtom nameLitKind @[combinator_formatter numLitNoAntiquot, expose] def numLitNoAntiquot.formatter := visitAtom numLitKind + @[combinator_formatter scientificLitNoAntiquot, expose] def scientificLitNoAntiquot.formatter := visitAtom scientificLitKind @[combinator_formatter fieldIdx, expose] def fieldIdx.formatter := visitAtom fieldIdxKind @@ -570,6 +571,8 @@ def interpolatedStr.formatter (p : Formatter) : Formatter := do | some str => push str *> goLeft | none => p +@[combinator_formatter hexnumNoAntiquot, expose] def hexnum.formatter := visitAtom hexnumKind + @[combinator_formatter _root_.ite, expose, macro_inline] def ite {_ : Type} (c : Prop) [Decidable c] (t e : Formatter) : Formatter := if c then t else e diff --git a/src/Lean/PrettyPrinter/Parenthesizer.lean b/src/Lean/PrettyPrinter/Parenthesizer.lean index fb43ea55a5..15f266aa21 100644 --- a/src/Lean/PrettyPrinter/Parenthesizer.lean +++ b/src/Lean/PrettyPrinter/Parenthesizer.lean @@ -563,6 +563,8 @@ def interpolatedStr.parenthesizer (p : Parenthesizer) : Parenthesizer := do else p +@[combinator_parenthesizer hexnumNoAntiquot, expose] def hexnum.parenthesizer := visitToken + @[combinator_parenthesizer _root_.ite, expose, macro_inline] def ite {_ : Type} (c : Prop) [Decidable c] (t e : Parenthesizer) : Parenthesizer := if c then t else e diff --git a/stage0/src/stdlib_flags.h b/stage0/src/stdlib_flags.h index 79a0e58edd..ad491b0de1 100644 --- a/stage0/src/stdlib_flags.h +++ b/stage0/src/stdlib_flags.h @@ -1,3 +1,4 @@ +// update me! #include "util/options.h" namespace lean { diff --git a/tests/lean/interactive/1265.lean.expected.out b/tests/lean/interactive/1265.lean.expected.out index aa0bedc5aa..6ab639cce4 100644 --- a/tests/lean/interactive/1265.lean.expected.out +++ b/tests/lean/interactive/1265.lean.expected.out @@ -15,12 +15,20 @@ {"label": "getName", "kind": 3, "data": ["«external:file:///1265.lean»", 0, 51, 1, "cLean.TSyntax.getName"]}, + {"label": "getHexNumVal", + "kind": 3, + "data": + ["«external:file:///1265.lean»", 0, 51, 1, "cLean.TSyntax.getHexNumVal"]}, {"label": "getChar", "kind": 3, "data": ["«external:file:///1265.lean»", 0, 51, 1, "cLean.TSyntax.getChar"]}, {"label": "raw", "kind": 5, "data": ["«external:file:///1265.lean»", 0, 51, 1, "cLean.TSyntax.raw"]}, + {"label": "getHexNumSize", + "kind": 3, + "data": + ["«external:file:///1265.lean»", 0, 51, 1, "cLean.TSyntax.getHexNumSize"]}, {"label": "getDocString", "kind": 3, "data": @@ -58,12 +66,20 @@ {"label": "getName", "kind": 3, "data": ["«external:file:///1265.lean»", 2, 53, 1, "cLean.TSyntax.getName"]}, + {"label": "getHexNumVal", + "kind": 3, + "data": + ["«external:file:///1265.lean»", 2, 53, 1, "cLean.TSyntax.getHexNumVal"]}, {"label": "getChar", "kind": 3, "data": ["«external:file:///1265.lean»", 2, 53, 1, "cLean.TSyntax.getChar"]}, {"label": "raw", "kind": 5, "data": ["«external:file:///1265.lean»", 2, 53, 1, "cLean.TSyntax.raw"]}, + {"label": "getHexNumSize", + "kind": 3, + "data": + ["«external:file:///1265.lean»", 2, 53, 1, "cLean.TSyntax.getHexNumSize"]}, {"label": "getDocString", "kind": 3, "data": diff --git a/tests/lean/run/hexnum.lean b/tests/lean/run/hexnum.lean new file mode 100644 index 0000000000..923d579f54 --- /dev/null +++ b/tests/lean/run/hexnum.lean @@ -0,0 +1,24 @@ +syntax "#" noWs hexnum : term +open Lean in +macro_rules + | `(#$n:hexnum) => `(($(quote n.getHexNumSize), $(quote n.getHexNumVal))) + +/-- info: (4, 44733) : Nat × Nat -/ +#guard_msgs in +#check #aebd + +/-- info: (1, 10) : Nat × Nat -/ +#guard_msgs in +#check #a + +/-- info: (2, 16) : Nat × Nat -/ +#guard_msgs in +#check #10 + +/-- info: (2, 5) : Nat × Nat -/ +#guard_msgs in +#check #05 + +/-- info: (3, 10) : Nat × Nat -/ +#guard_msgs in +#check #00a