fix: liberalize rules for atoms by allowing leading '' (#6114)

This PR liberalizes atom rules by allowing `''` to be a prefix of an
atom, after #6012 only added an exception for `''` alone, and also adds
some unit tests for atom validation.
This commit is contained in:
David Thrane Christiansen 2024-11-18 11:19:20 +01:00 committed by GitHub
parent 5a99cb326c
commit b8d6e44c4f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 140 additions and 1 deletions

View file

@ -236,8 +236,9 @@ where
-- Pretty-printing instructions shouldn't affect validity
let s := s.trim
!s.isEmpty &&
(s.front != '\'' || s == "''") &&
(s.front != '\'' || "''".isPrefixOf s) &&
s.front != '\"' &&
!(isIdBeginEscape s.front) &&
!(s.front == '`' && (s.endPos == ⟨1⟩ || isIdFirst (s.get ⟨1⟩) || isIdBeginEscape (s.get ⟨1⟩))) &&
!s.front.isDigit &&
!(s.any Char.isWhitespace)

View file

@ -0,0 +1,138 @@
import Lean.Elab.Command
import Lean.Elab.Syntax
open Lean.Elab.Term.toParserDescr (isValidAtom)
open Lean Elab Command
/-!
Test various classes of atoms that should be allowed or not.
-/
def test (expected : Bool) (strings : List String) : CommandElabM Unit := Id.run do
let mut wrong : List String := []
for s in strings do
if isValidAtom s != expected then
wrong := s :: wrong
if isValidAtom (" " ++ s) != expected then
wrong := s!"{s} (with leading whitespace)" :: wrong
if isValidAtom (s ++ " ") != expected then
wrong := s!"{s} (with trailing whitespace)" :: wrong
if isValidAtom (" " ++ s ++ " ") != expected then
wrong := s!"{s} (with leading and trailing whitespace)" :: wrong
if wrong.isEmpty then
logInfo <| "All " ++ if expected then "accepted" else "rejected"
else
logError <|
s!"The following atoms should be {if expected then "" else "in"}valid but are not:\n" ++
String.join (wrong.reverse.map (s! " * {·}\n"))
syntax "#test_valid" term : command
syntax "#test_invalid" term : command
macro_rules
| `(#test_valid%$tok $t) => `(#eval%$tok test true $t)
| `(#test_invalid%$tok $t) => `(#eval%$tok test false $t)
/-!
# No Empty Atoms
-/
/-- info: All rejected -/
#guard_msgs in
#test_invalid [""]
/-!
# Preventing Character Literal Confusion
Atoms are required to be suitably unlike character literals. This means that if they start with a
single quote, the next character must also be a single quote.
Two single quotes (and variations on it) has long-term usage as an infix operator in Mathlib.
-/
/-- info: All accepted -/
#guard_msgs in
#test_valid ["if", "''", "''ᵁ", "if'", "x'y'z", "x''y"]
/-- info: All rejected -/
#guard_msgs in
#test_invalid ["'x'", "'ᵁ", "'c", "'no'", "'"]
/-!
# No Internal Whitespace
-/
/-- info: All rejected -/
#guard_msgs in
#test_invalid ["open mixed", "open mixed"]
/-!
# No Confusion with String Literals
Internal double quotes are allowed.
-/
/-- info: All accepted -/
#guard_msgs in
#test_valid ["what\"string\"is_this?"]
/-- info: All rejected -/
#guard_msgs in
#test_invalid ["\"","\"\"", "\"f\""]
/-!
# No Confusion with Escaped Identifiers
This doesn't confuse the parser, but it may confuse a user if they can define an atom that can never
be parsed.
-/
/-- info: All accepted -/
#guard_msgs in
#test_valid ["prefix«", "prefix«test", "prefix«test»", "prefix«test»foo"]
/-- info: All rejected -/
#guard_msgs in
#test_invalid ["«", "«test", "«test»", "«test»foo"]
/-!
# No Confusion with Name Literals
The first two characters of an atom may not be a valid start of a name literal
-/
/-- info: All accepted -/
#guard_msgs in
#test_valid ["``", "`!", "x`"]
/-!
The next set all begin with U0x2035, REVERSED PRIME, rather than back-tick, and are thus accepted.
-/
/-- info: All accepted -/
#guard_msgs in
#test_valid ["", "x", "‵«", "xyz", "«x.y", "«x.y.z»"]
/-- info: All rejected -/
#guard_msgs in
#test_invalid ["`", "`x", "`«", "`xyz", "`«x.y", "`«x.y.z»"]
/-!
# No Leading Digits
-/
/-- info: All accepted -/
#guard_msgs in
#test_valid ["prefix5", "prefix22test", "prefix1test0", "prefix8test8foo"]
/-- info: All rejected -/
#guard_msgs in
#test_invalid ["0", "1test", "0test3"]