fix: liberalize rules for atoms by allowing leading '' (#6114)
This PR liberalizes atom rules by allowing `''` to be a prefix of an atom, after #6012 only added an exception for `''` alone, and also adds some unit tests for atom validation.
This commit is contained in:
parent
5a99cb326c
commit
b8d6e44c4f
2 changed files with 140 additions and 1 deletions
|
|
@ -236,8 +236,9 @@ where
|
|||
-- Pretty-printing instructions shouldn't affect validity
|
||||
let s := s.trim
|
||||
!s.isEmpty &&
|
||||
(s.front != '\'' || s == "''") &&
|
||||
(s.front != '\'' || "''".isPrefixOf s) &&
|
||||
s.front != '\"' &&
|
||||
!(isIdBeginEscape s.front) &&
|
||||
!(s.front == '`' && (s.endPos == ⟨1⟩ || isIdFirst (s.get ⟨1⟩) || isIdBeginEscape (s.get ⟨1⟩))) &&
|
||||
!s.front.isDigit &&
|
||||
!(s.any Char.isWhitespace)
|
||||
|
|
|
|||
138
tests/lean/run/atomValidation.lean
Normal file
138
tests/lean/run/atomValidation.lean
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
import Lean.Elab.Command
|
||||
import Lean.Elab.Syntax
|
||||
|
||||
open Lean.Elab.Term.toParserDescr (isValidAtom)
|
||||
open Lean Elab Command
|
||||
|
||||
/-!
|
||||
Test various classes of atoms that should be allowed or not.
|
||||
-/
|
||||
|
||||
def test (expected : Bool) (strings : List String) : CommandElabM Unit := Id.run do
|
||||
let mut wrong : List String := []
|
||||
for s in strings do
|
||||
if isValidAtom s != expected then
|
||||
wrong := s :: wrong
|
||||
if isValidAtom (" " ++ s) != expected then
|
||||
wrong := s!"{s} (with leading whitespace)" :: wrong
|
||||
if isValidAtom (s ++ " ") != expected then
|
||||
wrong := s!"{s} (with trailing whitespace)" :: wrong
|
||||
if isValidAtom (" " ++ s ++ " ") != expected then
|
||||
wrong := s!"{s} (with leading and trailing whitespace)" :: wrong
|
||||
|
||||
if wrong.isEmpty then
|
||||
logInfo <| "All " ++ if expected then "accepted" else "rejected"
|
||||
else
|
||||
logError <|
|
||||
s!"The following atoms should be {if expected then "" else "in"}valid but are not:\n" ++
|
||||
String.join (wrong.reverse.map (s! " * {·}\n"))
|
||||
|
||||
|
||||
syntax "#test_valid" term : command
|
||||
syntax "#test_invalid" term : command
|
||||
|
||||
macro_rules
|
||||
| `(#test_valid%$tok $t) => `(#eval%$tok test true $t)
|
||||
| `(#test_invalid%$tok $t) => `(#eval%$tok test false $t)
|
||||
|
||||
|
||||
/-!
|
||||
# No Empty Atoms
|
||||
-/
|
||||
|
||||
/-- info: All rejected -/
|
||||
#guard_msgs in
|
||||
#test_invalid [""]
|
||||
|
||||
|
||||
/-!
|
||||
# Preventing Character Literal Confusion
|
||||
|
||||
Atoms are required to be suitably unlike character literals. This means that if they start with a
|
||||
single quote, the next character must also be a single quote.
|
||||
|
||||
Two single quotes (and variations on it) has long-term usage as an infix operator in Mathlib.
|
||||
-/
|
||||
|
||||
/-- info: All accepted -/
|
||||
#guard_msgs in
|
||||
#test_valid ["if", "''", "''ᵁ", "if'", "x'y'z", "x''y"]
|
||||
|
||||
/-- info: All rejected -/
|
||||
#guard_msgs in
|
||||
#test_invalid ["'x'", "'ᵁ", "'c", "'no'", "'"]
|
||||
|
||||
|
||||
/-!
|
||||
# No Internal Whitespace
|
||||
-/
|
||||
|
||||
/-- info: All rejected -/
|
||||
#guard_msgs in
|
||||
#test_invalid ["open mixed", "open mixed"]
|
||||
|
||||
|
||||
/-!
|
||||
# No Confusion with String Literals
|
||||
|
||||
Internal double quotes are allowed.
|
||||
-/
|
||||
|
||||
/-- info: All accepted -/
|
||||
#guard_msgs in
|
||||
#test_valid ["what\"string\"is_this?"]
|
||||
|
||||
/-- info: All rejected -/
|
||||
#guard_msgs in
|
||||
#test_invalid ["\"","\"\"", "\"f\""]
|
||||
|
||||
/-!
|
||||
# No Confusion with Escaped Identifiers
|
||||
|
||||
This doesn't confuse the parser, but it may confuse a user if they can define an atom that can never
|
||||
be parsed.
|
||||
-/
|
||||
|
||||
/-- info: All accepted -/
|
||||
#guard_msgs in
|
||||
#test_valid ["prefix«", "prefix«test", "prefix«test»", "prefix«test»foo"]
|
||||
|
||||
/-- info: All rejected -/
|
||||
#guard_msgs in
|
||||
#test_invalid ["«", "«test", "«test»", "«test»foo"]
|
||||
|
||||
|
||||
/-!
|
||||
# No Confusion with Name Literals
|
||||
|
||||
The first two characters of an atom may not be a valid start of a name literal
|
||||
-/
|
||||
|
||||
/-- info: All accepted -/
|
||||
#guard_msgs in
|
||||
#test_valid ["``", "`!", "x`"]
|
||||
|
||||
/-!
|
||||
The next set all begin with U0x2035, REVERSED PRIME, rather than back-tick, and are thus accepted.
|
||||
-/
|
||||
/-- info: All accepted -/
|
||||
#guard_msgs in
|
||||
#test_valid ["‵", "‵x", "‵«", "‵xyz", "‵«x.y", "‵«x.y.z»"]
|
||||
|
||||
|
||||
/-- info: All rejected -/
|
||||
#guard_msgs in
|
||||
#test_invalid ["`", "`x", "`«", "`xyz", "`«x.y", "`«x.y.z»"]
|
||||
|
||||
|
||||
/-!
|
||||
# No Leading Digits
|
||||
-/
|
||||
|
||||
/-- info: All accepted -/
|
||||
#guard_msgs in
|
||||
#test_valid ["prefix5", "prefix22test", "prefix1test0", "prefix8test8foo"]
|
||||
|
||||
/-- info: All rejected -/
|
||||
#guard_msgs in
|
||||
#test_invalid ["0", "1test", "0test3"]
|
||||
Loading…
Add table
Reference in a new issue