fix: proper Name literals

cc @kha
This commit is contained in:
Leonardo de Moura 2020-01-24 12:36:29 -08:00
parent 4425feaebb
commit 364bb7bdf7
9 changed files with 174 additions and 81 deletions

View file

@ -515,6 +515,12 @@ s.toSubstring.trimLeft.toString
def trim (s : String) : String :=
s.toSubstring.trim.toString
@[inline] def nextWhile (s : String) (p : Char → Bool) (i : String.Pos) : String.Pos :=
Substring.takeWhileAux s s.bsize p i
@[inline] def nextUntil (s : String) (p : Char → Bool) (i : String.Pos) : String.Pos :=
nextWhile s (fun c => !p c) i
end String
protected def Char.toString (c : Char) : String :=

View file

@ -86,11 +86,11 @@ partial def toParserDescrAux : Syntax → ToParserDescrM Syntax
`(ParserDescr.symbol $(quote atom) $(quote rbp?))
| none => liftM throwUnsupportedSyntax
else if kind == `Lean.Parser.Syntax.num then
`(ParserDescr.num)
`(ParserDescr.numLit)
else if kind == `Lean.Parser.Syntax.str then
`(ParserDescr.str)
`(ParserDescr.strLit)
else if kind == `Lean.Parser.Syntax.char then
`(ParserDescr.char)
`(ParserDescr.charLit)
else if kind == `Lean.Parser.Syntax.ident then
`(ParserDescr.ident)
else if kind == `Lean.Parser.Syntax.try then do

View file

@ -830,9 +830,10 @@ fun stx _ => do
fun stx expectedType? => elabRawCharLit (stx.getArg 0) expectedType?
@[builtinTermElab quotedName] def elabQuotedName : TermElab :=
fun stx _ => match_syntax stx with
| `(`$n) => pure $ toExpr n.getId
| _ => throwUnsupportedSyntax
fun stx _ =>
match (stx.getArg 0).isNameLit? with
| some val => pure $ toExpr val
| none => throwError stx "ill-formed syntax"
end Term

View file

@ -1,40 +0,0 @@
/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura
-/
prelude
import Init.Data.Char.Basic
namespace Lean
def isGreek (c : Char) : Bool :=
0x391 ≤ c.val && c.val ≤ 0x3dd
def isLetterLike (c : Char) : Bool :=
(0x3b1 ≤ c.val && c.val ≤ 0x3c9 && c.val ≠ 0x3bb) || -- Lower greek, but lambda
(0x391 ≤ c.val && c.val ≤ 0x3A9 && c.val ≠ 0x3A0 && c.val ≠ 0x3A3) || -- Upper greek, but Pi and Sigma
(0x3ca ≤ c.val && c.val ≤ 0x3fb) || -- Coptic letters
(0x1f00 ≤ c.val && c.val ≤ 0x1ffe) || -- Polytonic Greek Extended Character Set
(0x2100 ≤ c.val && c.val ≤ 0x214f) || -- Letter like block
(0x1d49c ≤ c.val && c.val ≤ 0x1d59f) -- Latin letters, Script, Double-struck, Fractur
def isSubScriptAlnum (c : Char) : Bool :=
(0x2080 ≤ c.val && c.val ≤ 0x2089) || -- numeric subscripts
(0x2090 ≤ c.val && c.val ≤ 0x209c) ||
(0x1d62 ≤ c.val && c.val ≤ 0x1d6a)
def isIdFirst (c : Char) : Bool :=
c.isAlpha || c = '_' || isLetterLike c
def isIdRest (c : Char) : Bool :=
c.isAlphanum || c = '_' || c = '\'' || c == '!' || c == '?' || isLetterLike c || isSubScriptAlnum c
def idBeginEscape := '«'
def idEndEscape := '»'
def isIdBeginEscape (c : Char) : Bool :=
c = idBeginEscape
def isIdEndEscape (c : Char) : Bool :=
c = idEndEscape
end Lean

View file

@ -11,7 +11,6 @@ import Init.Lean.ToExpr
import Init.Lean.Environment
import Init.Lean.Attributes
import Init.Lean.Message
import Init.Lean.Parser.Identifier
import Init.Lean.Compiler.InitAttr
namespace Lean
@ -33,15 +32,21 @@ match stx.getOptional? with
end Syntax
namespace Parser
def isLitKind (k : SyntaxNodeKind) : Bool :=
k == strLitKind || k == numLitKind || k == charLitKind || k == nameLitKind
abbrev mkAtom (info : SourceInfo) (val : String) : Syntax :=
Syntax.atom info val
abbrev mkIdent (info : SourceInfo) (rawVal : Substring) (val : Name) : Syntax :=
Syntax.ident (some info) rawVal val []
/- Return character after position `pos` -/
def getNext (input : String) (pos : Nat) : Char :=
input.get (input.next pos)
/- Function application precedence.
In the standard lean language, only two tokens have precedence higher that `appPrec`.
- The token `.` has precedence `appPrec+1`. Thus, field accesses like `g (h x).f` are parsed as `g ((h x).f)`,
@ -830,6 +835,23 @@ partial def identFnAux (startPos : Nat) (tk : Option TokenConfig) : Name → Bas
else
mkTokenAndFixPos startPos tk c s
private def isIdFirstOrBeginEscape (c : Char) : Bool :=
isIdFirst c || isIdBeginEscape c
private def nameLitAux (startPos : Nat) : BasicParserFn
| c, s =>
let input := c.input;
let s := identFnAux startPos none Name.anonymous c (s.next input startPos);
if s.hasError then
s.mkErrorAt "invalid Name literal" startPos
else
let stx := s.stxStack.back;
match stx with
| Syntax.ident _ rawStr _ _ =>
let s := s.popSyntax;
s.pushSyntax (Syntax.node nameLitKind #[mkAtomFrom stx rawStr.toString])
| _ => s.mkError "invalid Name literal"
private def tokenFnAux : BasicParserFn
| c, s =>
let input := c.input;
@ -841,6 +863,8 @@ private def tokenFnAux : BasicParserFn
charLitFnAux i c (s.next input i)
else if curr.isDigit then
numberFnAux c s
else if curr == '`' && isIdFirstOrBeginEscape (getNext input i) then
nameLitAux i c s
else
let (_, tk) := c.tokens.matchPrefix input i;
identFnAux i tk Name.anonymous c s
@ -1092,6 +1116,16 @@ fun _ c s =>
{ fn := charLitFn,
info := mkAtomicInfo "charLit" }
def nameLitFn {k : ParserKind} : ParserFn k :=
fun _ c s =>
let iniPos := s.pos;
let s := tokenFn c s;
if s.hasError || !(s.stxStack.back.isOfKind nameLitKind) then s.mkErrorAt "Name literal" iniPos else s
@[inline] def nameLitNoAntiquot {k : ParserKind} : Parser k :=
{ fn := nameLitFn,
info := mkAtomicInfo "nameLit" }
def identFn {k : ParserKind} : ParserFn k :=
fun _ c s =>
let iniPos := s.pos;
@ -1130,7 +1164,7 @@ def unquotedSymbolFn {k : ParserKind} : ParserFn k :=
fun _ c s =>
let iniPos := s.pos;
let s := tokenFn c s;
if s.hasError || s.stxStack.back.isIdent || s.stxStack.back.isOfKind strLitKind || s.stxStack.back.isOfKind charLitKind || s.stxStack.back.isOfKind numLitKind then
if s.hasError || s.stxStack.back.isIdent || isLitKind s.stxStack.back.getKind then
s.mkErrorAt "symbol" iniPos
else
s
@ -1324,7 +1358,11 @@ match stx? with
| _ => (s, 0)
| some (Syntax.ident _ _ _ _) => (s, appPrec)
-- TODO(Leo): add support for associating lbp with syntax node kinds.
| some (Syntax.node k _) => if k == numLitKind || k == charLitKind || k == strLitKind || k == fieldIdxKind then (s, appPrec) else (s, 0)
| some (Syntax.node k _) =>
if isLitKind k || k == fieldIdxKind then
(s, appPrec)
else
(s, 0)
| _ => (s, 0)
def indexed {α : Type} (map : TokenMap α) (c : ParserContext) (s : ParserState) (leadingIdentAsSymbol : Bool) : ParserState × List α :=
@ -1486,6 +1524,9 @@ mkAntiquot "strLit" strLitKind <|> strLitNoAntiquot
def charLit {k : ParserKind} : Parser k :=
mkAntiquot "charLit" charLitKind <|> charLitNoAntiquot
def nameLit {k : ParserKind} : Parser k :=
mkAntiquot "nameLit" nameLitKind <|> nameLitNoAntiquot
def categoryParserOfStackFn (offset : Nat) : ParserFn leading :=
fun rbp ctx s =>
let stack := s.stxStack;
@ -1659,9 +1700,10 @@ def compileParserDescr (categories : ParserCategories) : forall {k : ParserKind}
| _, ParserDescr.sepBy1 d₁ d₂ => sepBy1 <$> compileParserDescr d₁ <*> compileParserDescr d₂
| _, ParserDescr.node k d => node k <$> compileParserDescr d
| _, ParserDescr.symbol tk lbp => pure $ symbolAux tk lbp
| _, ParserDescr.num => pure $ numLit
| _, ParserDescr.str => pure $ strLit
| _, ParserDescr.char => pure $ charLit
| _, ParserDescr.numLit => pure $ numLit
| _, ParserDescr.strLit => pure $ strLit
| _, ParserDescr.charLit => pure $ charLit
| _, ParserDescr.nameLit => pure $ nameLit
| _, ParserDescr.ident => pure $ ident
| ParserKind.leading,
ParserDescr.nonReservedSymbol tk includeIdent => pure $ nonReservedSymbol tk includeIdent
@ -1783,7 +1825,7 @@ parserExtension.addEntry env $ ParserExtensionEntry.kind k
def isValidSyntaxNodeKind (env : Environment) (k : SyntaxNodeKind) : Bool :=
let kinds := (parserExtension.getState env).kinds;
kinds.contains k || k == choiceKind || k == strLitKind || k == numLitKind || k == charLitKind
kinds.contains k || k == choiceKind || isLitKind k
def getSyntaxNodeKinds (env : Environment) : List SyntaxNodeKind := do
let kinds := (parserExtension.getState env).kinds;

View file

@ -85,7 +85,7 @@ def matchAlt := parser! " | " >> sepBy1 termParser ", " >> darrow >> termParser
@[builtinTermParser] def «parser!» := parser! "parser! " >> termParser
@[builtinTermParser] def «tparser!» := parser! "tparser! " >> termParser
@[builtinTermParser] def borrowed := parser! symbol "@&" appPrec >> termParser (appPrec - 1)
@[builtinTermParser] def quotedName := parser! symbol "`" appPrec >> rawIdent
@[builtinTermParser] def quotedName := parser! nameLit
-- NOTE: syntax quotations are defined in Init.Lean.Parser.Command
@[builtinTermParser] def antiquot := (mkAntiquot "term" none true : Parser)
@[builtinTermParser] def «match_syntax» := parser! "match_syntax" >> termParser >> " with " >> many1Indent matchAlt "'match_syntax' alternatives must be indented"

View file

@ -21,6 +21,36 @@ without importing the whole `Lean` module.
It also allow us to use extensions to develop the `Init` library.
-/
/- Valid identifier names -/
def isGreek (c : Char) : Bool :=
0x391 ≤ c.val && c.val ≤ 0x3dd
def isLetterLike (c : Char) : Bool :=
(0x3b1 ≤ c.val && c.val ≤ 0x3c9 && c.val ≠ 0x3bb) || -- Lower greek, but lambda
(0x391 ≤ c.val && c.val ≤ 0x3A9 && c.val ≠ 0x3A0 && c.val ≠ 0x3A3) || -- Upper greek, but Pi and Sigma
(0x3ca ≤ c.val && c.val ≤ 0x3fb) || -- Coptic letters
(0x1f00 ≤ c.val && c.val ≤ 0x1ffe) || -- Polytonic Greek Extended Character Set
(0x2100 ≤ c.val && c.val ≤ 0x214f) || -- Letter like block
(0x1d49c ≤ c.val && c.val ≤ 0x1d59f) -- Latin letters, Script, Double-struck, Fractur
def isSubScriptAlnum (c : Char) : Bool :=
(0x2080 ≤ c.val && c.val ≤ 0x2089) || -- numeric subscripts
(0x2090 ≤ c.val && c.val ≤ 0x209c) ||
(0x1d62 ≤ c.val && c.val ≤ 0x1d6a)
def isIdFirst (c : Char) : Bool :=
c.isAlpha || c = '_' || isLetterLike c
def isIdRest (c : Char) : Bool :=
c.isAlphanum || c = '_' || c = '\'' || c == '!' || c == '?' || isLetterLike c || isSubScriptAlnum c
def idBeginEscape := '«'
def idEndEscape := '»'
def isIdBeginEscape (c : Char) : Bool :=
c = idBeginEscape
def isIdEndEscape (c : Char) : Bool :=
c = idEndEscape
/- Hierarchical names -/
inductive Name
| anonymous : Name
@ -121,9 +151,10 @@ inductive ParserDescrCore : ParserKind → Type
| node {k : ParserKind} : Name → ParserDescrCore k → ParserDescrCore k
| symbol {k : ParserKind} : String → Option Nat → ParserDescrCore k
| nonReservedSymbol : String → Bool → ParserDescrCore ParserKind.leading
| num {k : ParserKind} : ParserDescrCore k
| str {k : ParserKind} : ParserDescrCore k
| char {k : ParserKind} : ParserDescrCore k
| numLit {k : ParserKind} : ParserDescrCore k
| strLit {k : ParserKind} : ParserDescrCore k
| charLit {k : ParserKind} : ParserDescrCore k
| nameLit {k : ParserKind} : ParserDescrCore k
| ident {k : ParserKind} : ParserDescrCore k
| pushLeading : ParserDescrCore ParserKind.trailing
| parser {k : ParserKind} : Name → Nat → ParserDescrCore k
@ -144,9 +175,10 @@ abbrev TrailingParserDescr := ParserDescrCore ParserKind.trailing
@[matchPattern] abbrev ParserDescr.sepBy1 := @ParserDescrCore.sepBy1
@[matchPattern] abbrev ParserDescr.node := @ParserDescrCore.node
@[matchPattern] abbrev ParserDescr.symbol := @ParserDescrCore.symbol
@[matchPattern] abbrev ParserDescr.num := @ParserDescrCore.num
@[matchPattern] abbrev ParserDescr.str := @ParserDescrCore.str
@[matchPattern] abbrev ParserDescr.char := @ParserDescrCore.char
@[matchPattern] abbrev ParserDescr.numLit := @ParserDescrCore.numLit
@[matchPattern] abbrev ParserDescr.strLit := @ParserDescrCore.strLit
@[matchPattern] abbrev ParserDescr.charLit := @ParserDescrCore.charLit
@[matchPattern] abbrev ParserDescr.nameLit := @ParserDescrCore.nameLit
@[matchPattern] abbrev ParserDescr.ident := @ParserDescrCore.ident
@[matchPattern] abbrev ParserDescr.nonReservedSymbol := @ParserDescrCore.nonReservedSymbol
@[matchPattern] abbrev ParserDescr.pushLeading := @ParserDescrCore.pushLeading
@ -373,6 +405,7 @@ abbrev Macro := Syntax → MacroM Syntax
def strLitKind : SyntaxNodeKind := `strLit
def charLitKind : SyntaxNodeKind := `charLit
def numLitKind : SyntaxNodeKind := `numLit
def nameLitKind : SyntaxNodeKind := `nameLit
def fieldIdxKind : SyntaxNodeKind := `fieldIdx
/- Helper functions for processing Syntax programmatically -/
@ -524,16 +557,22 @@ else
else if c.isDigit then decodeDecimalLitAux s 0 0
else none
def isNatLitAux (nodeKind : SyntaxNodeKind) : Syntax → Option Nat
| Syntax.node k args =>
if k == nodeKind && args.size == 1 then
def isLit? (litKind : SyntaxNodeKind) (stx : Syntax) : Option String :=
match stx with
| Syntax.node k args =>
if k == litKind && args.size == 1 then
match args.get! 0 with
| (Syntax.atom _ val) => decodeNatLitVal val
| (Syntax.atom _ val) => some val
| _ => none
else
none
| _ => none
def isNatLitAux (litKind : SyntaxNodeKind) (stx : Syntax) : Option Nat :=
match isLit? litKind stx with
| some val => decodeNatLitVal val
| _ => none
def isNatLit? (s : Syntax) : Option Nat :=
isNatLitAux numLitKind s
@ -586,15 +625,10 @@ partial def decodeStrLitAux (s : String) : String.Pos → String → Option Stri
def decodeStrLit (s : String) : Option String :=
decodeStrLitAux s 1 ""
def isStrLit? : Syntax → Option String
| Syntax.node k args =>
if k == strLitKind && args.size == 1 then
match args.get! 0 with
| (Syntax.atom _ val) => decodeStrLit val
| _ => none
else
none
| _ => none
def isStrLit? (stx : Syntax) : Option String :=
match isLit? strLitKind stx with
| some val => decodeStrLit val
| _ => none
def decodeCharLit (s : String) : Option Char :=
let c := s.get 1;
@ -604,15 +638,43 @@ if c == '\\' then do
else
pure c
def isCharLit? : Syntax → Option Char
| Syntax.node k args =>
if k == charLitKind && args.size == 1 then
match args.get! 0 with
| (Syntax.atom _ val) => decodeCharLit val
| _ => none
def isCharLit? (stx : Syntax) : Option Char :=
match isLit? charLitKind stx with
| some val => decodeCharLit val
| _ => none
private partial def decodeNameLitAux (s : String) : Nat → Name → Option Name
| i, r =>
let continue? (i : Nat) (r : Name) : Option Name :=
if s.get i == '.' then
decodeNameLitAux (s.next i) r
else if s.atEnd i then
pure r
else
none;
let curr := s.get i;
if isIdBeginEscape curr then
let startPart := s.next i;
let stopPart := s.nextUntil isIdEndEscape startPart;
if !isIdEndEscape (s.get stopPart) then none
else continue? (s.next stopPart) (mkNameStr r (s.extract startPart stopPart))
else if isIdFirst curr then
let startPart := i;
let stopPart := s.nextWhile isIdRest startPart;
continue? stopPart (mkNameStr r (s.extract startPart stopPart))
else
none
| _ => none
def decodeNameLit (s : String) : Option Name :=
if s.get 0 == '`' then
decodeNameLitAux s 1 Name.anonymous
else
none
def isNameLit? (stx : Syntax) : Option Name :=
match isLit? nameLitKind stx with
| some val => decodeNameLit val
| _ => none
def hasArgs : Syntax → Bool
| Syntax.node _ args => args.size > 0

14
tests/lean/namelit.lean Normal file
View file

@ -0,0 +1,14 @@
new_frontend
#check `foo
#check `foo.bla
#check `«foo bla»
#check `«foo bla».«hello world»
#check `«foo bla».boo.«hello world»
#check `foo.«hello»
macro dummy1 : term => `(`hello)
macro dummy2 : term => `(`hello.«world !!!»)
#check dummy1
#check dummy2

View file

@ -0,0 +1,8 @@
Lean.mkNameStr Lean.Name.anonymous "foo" : Lean.Name
Lean.mkNameStr (Lean.mkNameStr Lean.Name.anonymous "foo") "bla" : Lean.Name
Lean.mkNameStr Lean.Name.anonymous "foo bla" : Lean.Name
Lean.mkNameStr (Lean.mkNameStr Lean.Name.anonymous "foo bla") "hello world" : Lean.Name
Lean.mkNameStr (Lean.mkNameStr (Lean.mkNameStr Lean.Name.anonymous "foo bla") "boo") "hello world" : Lean.Name
Lean.mkNameStr (Lean.mkNameStr Lean.Name.anonymous "foo") "hello" : Lean.Name
Lean.mkNameStr Lean.Name.anonymous "hello" : Lean.Name
Lean.mkNameStr (Lean.mkNameStr Lean.Name.anonymous "hello") "world !!!" : Lean.Name