From b8d6e44c4f65c9a38f2ebc8e2d1691575b53e761 Mon Sep 17 00:00:00 2001 From: David Thrane Christiansen Date: Mon, 18 Nov 2024 11:19:20 +0100 Subject: [PATCH] fix: liberalize rules for atoms by allowing leading '' (#6114) This PR liberalizes atom rules by allowing `''` to be a prefix of an atom, after #6012 only added an exception for `''` alone, and also adds some unit tests for atom validation. --- src/Lean/Elab/Syntax.lean | 3 +- tests/lean/run/atomValidation.lean | 138 +++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 tests/lean/run/atomValidation.lean diff --git a/src/Lean/Elab/Syntax.lean b/src/Lean/Elab/Syntax.lean index 29bf4adc2e..44b9fa25a0 100644 --- a/src/Lean/Elab/Syntax.lean +++ b/src/Lean/Elab/Syntax.lean @@ -236,8 +236,9 @@ where -- Pretty-printing instructions shouldn't affect validity let s := s.trim !s.isEmpty && - (s.front != '\'' || s == "''") && + (s.front != '\'' || "''".isPrefixOf s) && s.front != '\"' && + !(isIdBeginEscape s.front) && !(s.front == '`' && (s.endPos == ⟨1⟩ || isIdFirst (s.get ⟨1⟩) || isIdBeginEscape (s.get ⟨1⟩))) && !s.front.isDigit && !(s.any Char.isWhitespace) diff --git a/tests/lean/run/atomValidation.lean b/tests/lean/run/atomValidation.lean new file mode 100644 index 0000000000..11e5aa1139 --- /dev/null +++ b/tests/lean/run/atomValidation.lean @@ -0,0 +1,138 @@ +import Lean.Elab.Command +import Lean.Elab.Syntax + +open Lean.Elab.Term.toParserDescr (isValidAtom) +open Lean Elab Command + +/-! +Test various classes of atoms that should be allowed or not. +-/ + +def test (expected : Bool) (strings : List String) : CommandElabM Unit := Id.run do + let mut wrong : List String := [] + for s in strings do + if isValidAtom s != expected then + wrong := s :: wrong + if isValidAtom (" " ++ s) != expected then + wrong := s!"{s} (with leading whitespace)" :: wrong + if isValidAtom (s ++ " ") != expected then + wrong := s!"{s} (with trailing whitespace)" :: wrong + if isValidAtom (" " ++ s ++ " ") != expected then + wrong := s!"{s} (with leading and trailing whitespace)" :: wrong + + if wrong.isEmpty then + logInfo <| "All " ++ if expected then "accepted" else "rejected" + else + logError <| + s!"The following atoms should be {if expected then "" else "in"}valid but are not:\n" ++ + String.join (wrong.reverse.map (s! " * {·}\n")) + + +syntax "#test_valid" term : command +syntax "#test_invalid" term : command + +macro_rules + | `(#test_valid%$tok $t) => `(#eval%$tok test true $t) + | `(#test_invalid%$tok $t) => `(#eval%$tok test false $t) + + +/-! +# No Empty Atoms +-/ + +/-- info: All rejected -/ +#guard_msgs in +#test_invalid [""] + + +/-! +# Preventing Character Literal Confusion + +Atoms are required to be suitably unlike character literals. This means that if they start with a +single quote, the next character must also be a single quote. + +Two single quotes (and variations on it) has long-term usage as an infix operator in Mathlib. +-/ + +/-- info: All accepted -/ +#guard_msgs in +#test_valid ["if", "''", "''ᵁ", "if'", "x'y'z", "x''y"] + +/-- info: All rejected -/ +#guard_msgs in +#test_invalid ["'x'", "'ᵁ", "'c", "'no'", "'"] + + +/-! +# No Internal Whitespace +-/ + +/-- info: All rejected -/ +#guard_msgs in +#test_invalid ["open mixed", "open mixed"] + + +/-! +# No Confusion with String Literals + +Internal double quotes are allowed. +-/ + +/-- info: All accepted -/ +#guard_msgs in +#test_valid ["what\"string\"is_this?"] + +/-- info: All rejected -/ +#guard_msgs in +#test_invalid ["\"","\"\"", "\"f\""] + +/-! +# No Confusion with Escaped Identifiers + +This doesn't confuse the parser, but it may confuse a user if they can define an atom that can never +be parsed. +-/ + +/-- info: All accepted -/ +#guard_msgs in +#test_valid ["prefix«", "prefix«test", "prefix«test»", "prefix«test»foo"] + +/-- info: All rejected -/ +#guard_msgs in +#test_invalid ["«", "«test", "«test»", "«test»foo"] + + +/-! +# No Confusion with Name Literals + +The first two characters of an atom may not be a valid start of a name literal +-/ + +/-- info: All accepted -/ +#guard_msgs in +#test_valid ["``", "`!", "x`"] + +/-! +The next set all begin with U0x2035, REVERSED PRIME, rather than back-tick, and are thus accepted. +-/ +/-- info: All accepted -/ +#guard_msgs in +#test_valid ["‵", "‵x", "‵«", "‵xyz", "‵«x.y", "‵«x.y.z»"] + + +/-- info: All rejected -/ +#guard_msgs in +#test_invalid ["`", "`x", "`«", "`xyz", "`«x.y", "`«x.y.z»"] + + +/-! +# No Leading Digits +-/ + +/-- info: All accepted -/ +#guard_msgs in +#test_valid ["prefix5", "prefix22test", "prefix1test0", "prefix8test8foo"] + +/-- info: All rejected -/ +#guard_msgs in +#test_invalid ["0", "1test", "0test3"]