diff --git a/src/Init/Data/String/Basic.lean b/src/Init/Data/String/Basic.lean index 9673d385ea..c5309c9aa1 100644 --- a/src/Init/Data/String/Basic.lean +++ b/src/Init/Data/String/Basic.lean @@ -515,6 +515,12 @@ s.toSubstring.trimLeft.toString def trim (s : String) : String := s.toSubstring.trim.toString +@[inline] def nextWhile (s : String) (p : Char → Bool) (i : String.Pos) : String.Pos := +Substring.takeWhileAux s s.bsize p i + +@[inline] def nextUntil (s : String) (p : Char → Bool) (i : String.Pos) : String.Pos := +nextWhile s (fun c => !p c) i + end String protected def Char.toString (c : Char) : String := diff --git a/src/Init/Lean/Elab/Syntax.lean b/src/Init/Lean/Elab/Syntax.lean index 60599042e5..ccd725db13 100644 --- a/src/Init/Lean/Elab/Syntax.lean +++ b/src/Init/Lean/Elab/Syntax.lean @@ -86,11 +86,11 @@ partial def toParserDescrAux : Syntax → ToParserDescrM Syntax `(ParserDescr.symbol $(quote atom) $(quote rbp?)) | none => liftM throwUnsupportedSyntax else if kind == `Lean.Parser.Syntax.num then - `(ParserDescr.num) + `(ParserDescr.numLit) else if kind == `Lean.Parser.Syntax.str then - `(ParserDescr.str) + `(ParserDescr.strLit) else if kind == `Lean.Parser.Syntax.char then - `(ParserDescr.char) + `(ParserDescr.charLit) else if kind == `Lean.Parser.Syntax.ident then `(ParserDescr.ident) else if kind == `Lean.Parser.Syntax.try then do diff --git a/src/Init/Lean/Elab/Term.lean b/src/Init/Lean/Elab/Term.lean index 22785e628d..59acf6b675 100644 --- a/src/Init/Lean/Elab/Term.lean +++ b/src/Init/Lean/Elab/Term.lean @@ -830,9 +830,10 @@ fun stx _ => do fun stx expectedType? => elabRawCharLit (stx.getArg 0) expectedType? @[builtinTermElab quotedName] def elabQuotedName : TermElab := -fun stx _ => match_syntax stx with -| `(`$n) => pure $ toExpr n.getId -| _ => throwUnsupportedSyntax +fun stx _ => + match (stx.getArg 0).isNameLit? with + | some val => pure $ toExpr val + | none => throwError stx "ill-formed syntax" end Term diff --git a/src/Init/Lean/Parser/Identifier.lean b/src/Init/Lean/Parser/Identifier.lean deleted file mode 100644 index 74b313723c..0000000000 --- a/src/Init/Lean/Parser/Identifier.lean +++ /dev/null @@ -1,40 +0,0 @@ -/- -Copyright (c) 2018 Microsoft Corporation. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. -Authors: Leonardo de Moura --/ -prelude -import Init.Data.Char.Basic - -namespace Lean - -def isGreek (c : Char) : Bool := -0x391 ≤ c.val && c.val ≤ 0x3dd - -def isLetterLike (c : Char) : Bool := -(0x3b1 ≤ c.val && c.val ≤ 0x3c9 && c.val ≠ 0x3bb) || -- Lower greek, but lambda -(0x391 ≤ c.val && c.val ≤ 0x3A9 && c.val ≠ 0x3A0 && c.val ≠ 0x3A3) || -- Upper greek, but Pi and Sigma -(0x3ca ≤ c.val && c.val ≤ 0x3fb) || -- Coptic letters -(0x1f00 ≤ c.val && c.val ≤ 0x1ffe) || -- Polytonic Greek Extended Character Set -(0x2100 ≤ c.val && c.val ≤ 0x214f) || -- Letter like block -(0x1d49c ≤ c.val && c.val ≤ 0x1d59f) -- Latin letters, Script, Double-struck, Fractur - -def isSubScriptAlnum (c : Char) : Bool := -(0x2080 ≤ c.val && c.val ≤ 0x2089) || -- numeric subscripts -(0x2090 ≤ c.val && c.val ≤ 0x209c) || -(0x1d62 ≤ c.val && c.val ≤ 0x1d6a) - -def isIdFirst (c : Char) : Bool := -c.isAlpha || c = '_' || isLetterLike c - -def isIdRest (c : Char) : Bool := -c.isAlphanum || c = '_' || c = '\'' || c == '!' || c == '?' || isLetterLike c || isSubScriptAlnum c - -def idBeginEscape := '«' -def idEndEscape := '»' -def isIdBeginEscape (c : Char) : Bool := -c = idBeginEscape -def isIdEndEscape (c : Char) : Bool := -c = idEndEscape - -end Lean diff --git a/src/Init/Lean/Parser/Parser.lean b/src/Init/Lean/Parser/Parser.lean index 96ca794e6a..ff653c09d7 100644 --- a/src/Init/Lean/Parser/Parser.lean +++ b/src/Init/Lean/Parser/Parser.lean @@ -11,7 +11,6 @@ import Init.Lean.ToExpr import Init.Lean.Environment import Init.Lean.Attributes import Init.Lean.Message -import Init.Lean.Parser.Identifier import Init.Lean.Compiler.InitAttr namespace Lean @@ -33,15 +32,21 @@ match stx.getOptional? with end Syntax - namespace Parser +def isLitKind (k : SyntaxNodeKind) : Bool := +k == strLitKind || k == numLitKind || k == charLitKind || k == nameLitKind + abbrev mkAtom (info : SourceInfo) (val : String) : Syntax := Syntax.atom info val abbrev mkIdent (info : SourceInfo) (rawVal : Substring) (val : Name) : Syntax := Syntax.ident (some info) rawVal val [] +/- Return character after position `pos` -/ +def getNext (input : String) (pos : Nat) : Char := +input.get (input.next pos) + /- Function application precedence. In the standard lean language, only two tokens have precedence higher that `appPrec`. - The token `.` has precedence `appPrec+1`. Thus, field accesses like `g (h x).f` are parsed as `g ((h x).f)`, @@ -830,6 +835,23 @@ partial def identFnAux (startPos : Nat) (tk : Option TokenConfig) : Name → Bas else mkTokenAndFixPos startPos tk c s +private def isIdFirstOrBeginEscape (c : Char) : Bool := +isIdFirst c || isIdBeginEscape c + +private def nameLitAux (startPos : Nat) : BasicParserFn +| c, s => + let input := c.input; + let s := identFnAux startPos none Name.anonymous c (s.next input startPos); + if s.hasError then + s.mkErrorAt "invalid Name literal" startPos + else + let stx := s.stxStack.back; + match stx with + | Syntax.ident _ rawStr _ _ => + let s := s.popSyntax; + s.pushSyntax (Syntax.node nameLitKind #[mkAtomFrom stx rawStr.toString]) + | _ => s.mkError "invalid Name literal" + private def tokenFnAux : BasicParserFn | c, s => let input := c.input; @@ -841,6 +863,8 @@ private def tokenFnAux : BasicParserFn charLitFnAux i c (s.next input i) else if curr.isDigit then numberFnAux c s + else if curr == '`' && isIdFirstOrBeginEscape (getNext input i) then + nameLitAux i c s else let (_, tk) := c.tokens.matchPrefix input i; identFnAux i tk Name.anonymous c s @@ -1092,6 +1116,16 @@ fun _ c s => { fn := charLitFn, info := mkAtomicInfo "charLit" } +def nameLitFn {k : ParserKind} : ParserFn k := +fun _ c s => + let iniPos := s.pos; + let s := tokenFn c s; + if s.hasError || !(s.stxStack.back.isOfKind nameLitKind) then s.mkErrorAt "Name literal" iniPos else s + +@[inline] def nameLitNoAntiquot {k : ParserKind} : Parser k := +{ fn := nameLitFn, + info := mkAtomicInfo "nameLit" } + def identFn {k : ParserKind} : ParserFn k := fun _ c s => let iniPos := s.pos; @@ -1130,7 +1164,7 @@ def unquotedSymbolFn {k : ParserKind} : ParserFn k := fun _ c s => let iniPos := s.pos; let s := tokenFn c s; - if s.hasError || s.stxStack.back.isIdent || s.stxStack.back.isOfKind strLitKind || s.stxStack.back.isOfKind charLitKind || s.stxStack.back.isOfKind numLitKind then + if s.hasError || s.stxStack.back.isIdent || isLitKind s.stxStack.back.getKind then s.mkErrorAt "symbol" iniPos else s @@ -1324,7 +1358,11 @@ match stx? with | _ => (s, 0) | some (Syntax.ident _ _ _ _) => (s, appPrec) -- TODO(Leo): add support for associating lbp with syntax node kinds. -| some (Syntax.node k _) => if k == numLitKind || k == charLitKind || k == strLitKind || k == fieldIdxKind then (s, appPrec) else (s, 0) +| some (Syntax.node k _) => + if isLitKind k || k == fieldIdxKind then + (s, appPrec) + else + (s, 0) | _ => (s, 0) def indexed {α : Type} (map : TokenMap α) (c : ParserContext) (s : ParserState) (leadingIdentAsSymbol : Bool) : ParserState × List α := @@ -1486,6 +1524,9 @@ mkAntiquot "strLit" strLitKind <|> strLitNoAntiquot def charLit {k : ParserKind} : Parser k := mkAntiquot "charLit" charLitKind <|> charLitNoAntiquot +def nameLit {k : ParserKind} : Parser k := +mkAntiquot "nameLit" nameLitKind <|> nameLitNoAntiquot + def categoryParserOfStackFn (offset : Nat) : ParserFn leading := fun rbp ctx s => let stack := s.stxStack; @@ -1659,9 +1700,10 @@ def compileParserDescr (categories : ParserCategories) : forall {k : ParserKind} | _, ParserDescr.sepBy1 d₁ d₂ => sepBy1 <$> compileParserDescr d₁ <*> compileParserDescr d₂ | _, ParserDescr.node k d => node k <$> compileParserDescr d | _, ParserDescr.symbol tk lbp => pure $ symbolAux tk lbp -| _, ParserDescr.num => pure $ numLit -| _, ParserDescr.str => pure $ strLit -| _, ParserDescr.char => pure $ charLit +| _, ParserDescr.numLit => pure $ numLit +| _, ParserDescr.strLit => pure $ strLit +| _, ParserDescr.charLit => pure $ charLit +| _, ParserDescr.nameLit => pure $ nameLit | _, ParserDescr.ident => pure $ ident | ParserKind.leading, ParserDescr.nonReservedSymbol tk includeIdent => pure $ nonReservedSymbol tk includeIdent @@ -1783,7 +1825,7 @@ parserExtension.addEntry env $ ParserExtensionEntry.kind k def isValidSyntaxNodeKind (env : Environment) (k : SyntaxNodeKind) : Bool := let kinds := (parserExtension.getState env).kinds; -kinds.contains k || k == choiceKind || k == strLitKind || k == numLitKind || k == charLitKind +kinds.contains k || k == choiceKind || isLitKind k def getSyntaxNodeKinds (env : Environment) : List SyntaxNodeKind := do let kinds := (parserExtension.getState env).kinds; diff --git a/src/Init/Lean/Parser/Term.lean b/src/Init/Lean/Parser/Term.lean index d7f306f987..f494e95e9d 100644 --- a/src/Init/Lean/Parser/Term.lean +++ b/src/Init/Lean/Parser/Term.lean @@ -85,7 +85,7 @@ def matchAlt := parser! " | " >> sepBy1 termParser ", " >> darrow >> termParser @[builtinTermParser] def «parser!» := parser! "parser! " >> termParser @[builtinTermParser] def «tparser!» := parser! "tparser! " >> termParser @[builtinTermParser] def borrowed := parser! symbol "@&" appPrec >> termParser (appPrec - 1) -@[builtinTermParser] def quotedName := parser! symbol "`" appPrec >> rawIdent +@[builtinTermParser] def quotedName := parser! nameLit -- NOTE: syntax quotations are defined in Init.Lean.Parser.Command @[builtinTermParser] def antiquot := (mkAntiquot "term" none true : Parser) @[builtinTermParser] def «match_syntax» := parser! "match_syntax" >> termParser >> " with " >> many1Indent matchAlt "'match_syntax' alternatives must be indented" diff --git a/src/Init/LeanInit.lean b/src/Init/LeanInit.lean index 0c8c95ed72..103a942e82 100644 --- a/src/Init/LeanInit.lean +++ b/src/Init/LeanInit.lean @@ -21,6 +21,36 @@ without importing the whole `Lean` module. It also allow us to use extensions to develop the `Init` library. -/ +/- Valid identifier names -/ +def isGreek (c : Char) : Bool := +0x391 ≤ c.val && c.val ≤ 0x3dd + +def isLetterLike (c : Char) : Bool := +(0x3b1 ≤ c.val && c.val ≤ 0x3c9 && c.val ≠ 0x3bb) || -- Lower greek, but lambda +(0x391 ≤ c.val && c.val ≤ 0x3A9 && c.val ≠ 0x3A0 && c.val ≠ 0x3A3) || -- Upper greek, but Pi and Sigma +(0x3ca ≤ c.val && c.val ≤ 0x3fb) || -- Coptic letters +(0x1f00 ≤ c.val && c.val ≤ 0x1ffe) || -- Polytonic Greek Extended Character Set +(0x2100 ≤ c.val && c.val ≤ 0x214f) || -- Letter like block +(0x1d49c ≤ c.val && c.val ≤ 0x1d59f) -- Latin letters, Script, Double-struck, Fractur + +def isSubScriptAlnum (c : Char) : Bool := +(0x2080 ≤ c.val && c.val ≤ 0x2089) || -- numeric subscripts +(0x2090 ≤ c.val && c.val ≤ 0x209c) || +(0x1d62 ≤ c.val && c.val ≤ 0x1d6a) + +def isIdFirst (c : Char) : Bool := +c.isAlpha || c = '_' || isLetterLike c + +def isIdRest (c : Char) : Bool := +c.isAlphanum || c = '_' || c = '\'' || c == '!' || c == '?' || isLetterLike c || isSubScriptAlnum c + +def idBeginEscape := '«' +def idEndEscape := '»' +def isIdBeginEscape (c : Char) : Bool := +c = idBeginEscape +def isIdEndEscape (c : Char) : Bool := +c = idEndEscape + /- Hierarchical names -/ inductive Name | anonymous : Name @@ -121,9 +151,10 @@ inductive ParserDescrCore : ParserKind → Type | node {k : ParserKind} : Name → ParserDescrCore k → ParserDescrCore k | symbol {k : ParserKind} : String → Option Nat → ParserDescrCore k | nonReservedSymbol : String → Bool → ParserDescrCore ParserKind.leading -| num {k : ParserKind} : ParserDescrCore k -| str {k : ParserKind} : ParserDescrCore k -| char {k : ParserKind} : ParserDescrCore k +| numLit {k : ParserKind} : ParserDescrCore k +| strLit {k : ParserKind} : ParserDescrCore k +| charLit {k : ParserKind} : ParserDescrCore k +| nameLit {k : ParserKind} : ParserDescrCore k | ident {k : ParserKind} : ParserDescrCore k | pushLeading : ParserDescrCore ParserKind.trailing | parser {k : ParserKind} : Name → Nat → ParserDescrCore k @@ -144,9 +175,10 @@ abbrev TrailingParserDescr := ParserDescrCore ParserKind.trailing @[matchPattern] abbrev ParserDescr.sepBy1 := @ParserDescrCore.sepBy1 @[matchPattern] abbrev ParserDescr.node := @ParserDescrCore.node @[matchPattern] abbrev ParserDescr.symbol := @ParserDescrCore.symbol -@[matchPattern] abbrev ParserDescr.num := @ParserDescrCore.num -@[matchPattern] abbrev ParserDescr.str := @ParserDescrCore.str -@[matchPattern] abbrev ParserDescr.char := @ParserDescrCore.char +@[matchPattern] abbrev ParserDescr.numLit := @ParserDescrCore.numLit +@[matchPattern] abbrev ParserDescr.strLit := @ParserDescrCore.strLit +@[matchPattern] abbrev ParserDescr.charLit := @ParserDescrCore.charLit +@[matchPattern] abbrev ParserDescr.nameLit := @ParserDescrCore.nameLit @[matchPattern] abbrev ParserDescr.ident := @ParserDescrCore.ident @[matchPattern] abbrev ParserDescr.nonReservedSymbol := @ParserDescrCore.nonReservedSymbol @[matchPattern] abbrev ParserDescr.pushLeading := @ParserDescrCore.pushLeading @@ -373,6 +405,7 @@ abbrev Macro := Syntax → MacroM Syntax def strLitKind : SyntaxNodeKind := `strLit def charLitKind : SyntaxNodeKind := `charLit def numLitKind : SyntaxNodeKind := `numLit +def nameLitKind : SyntaxNodeKind := `nameLit def fieldIdxKind : SyntaxNodeKind := `fieldIdx /- Helper functions for processing Syntax programmatically -/ @@ -524,16 +557,22 @@ else else if c.isDigit then decodeDecimalLitAux s 0 0 else none -def isNatLitAux (nodeKind : SyntaxNodeKind) : Syntax → Option Nat -| Syntax.node k args => - if k == nodeKind && args.size == 1 then +def isLit? (litKind : SyntaxNodeKind) (stx : Syntax) : Option String := +match stx with +| Syntax.node k args => + if k == litKind && args.size == 1 then match args.get! 0 with - | (Syntax.atom _ val) => decodeNatLitVal val + | (Syntax.atom _ val) => some val | _ => none else none | _ => none +def isNatLitAux (litKind : SyntaxNodeKind) (stx : Syntax) : Option Nat := +match isLit? litKind stx with +| some val => decodeNatLitVal val +| _ => none + def isNatLit? (s : Syntax) : Option Nat := isNatLitAux numLitKind s @@ -586,15 +625,10 @@ partial def decodeStrLitAux (s : String) : String.Pos → String → Option Stri def decodeStrLit (s : String) : Option String := decodeStrLitAux s 1 "" -def isStrLit? : Syntax → Option String -| Syntax.node k args => - if k == strLitKind && args.size == 1 then - match args.get! 0 with - | (Syntax.atom _ val) => decodeStrLit val - | _ => none - else - none -| _ => none +def isStrLit? (stx : Syntax) : Option String := +match isLit? strLitKind stx with +| some val => decodeStrLit val +| _ => none def decodeCharLit (s : String) : Option Char := let c := s.get 1; @@ -604,15 +638,43 @@ if c == '\\' then do else pure c -def isCharLit? : Syntax → Option Char -| Syntax.node k args => - if k == charLitKind && args.size == 1 then - match args.get! 0 with - | (Syntax.atom _ val) => decodeCharLit val - | _ => none +def isCharLit? (stx : Syntax) : Option Char := +match isLit? charLitKind stx with +| some val => decodeCharLit val +| _ => none + +private partial def decodeNameLitAux (s : String) : Nat → Name → Option Name +| i, r => + let continue? (i : Nat) (r : Name) : Option Name := + if s.get i == '.' then + decodeNameLitAux (s.next i) r + else if s.atEnd i then + pure r + else + none; + let curr := s.get i; + if isIdBeginEscape curr then + let startPart := s.next i; + let stopPart := s.nextUntil isIdEndEscape startPart; + if !isIdEndEscape (s.get stopPart) then none + else continue? (s.next stopPart) (mkNameStr r (s.extract startPart stopPart)) + else if isIdFirst curr then + let startPart := i; + let stopPart := s.nextWhile isIdRest startPart; + continue? stopPart (mkNameStr r (s.extract startPart stopPart)) else none -| _ => none + +def decodeNameLit (s : String) : Option Name := +if s.get 0 == '`' then + decodeNameLitAux s 1 Name.anonymous +else + none + +def isNameLit? (stx : Syntax) : Option Name := +match isLit? nameLitKind stx with +| some val => decodeNameLit val +| _ => none def hasArgs : Syntax → Bool | Syntax.node _ args => args.size > 0 diff --git a/tests/lean/namelit.lean b/tests/lean/namelit.lean new file mode 100644 index 0000000000..1068ff04fb --- /dev/null +++ b/tests/lean/namelit.lean @@ -0,0 +1,14 @@ +new_frontend + +#check `foo +#check `foo.bla +#check `«foo bla» +#check `«foo bla».«hello world» +#check `«foo bla».boo.«hello world» +#check `foo.«hello» + +macro dummy1 : term => `(`hello) +macro dummy2 : term => `(`hello.«world !!!») + +#check dummy1 +#check dummy2 diff --git a/tests/lean/namelit.lean.expected.out b/tests/lean/namelit.lean.expected.out new file mode 100644 index 0000000000..b2ee2fe64a --- /dev/null +++ b/tests/lean/namelit.lean.expected.out @@ -0,0 +1,8 @@ +Lean.mkNameStr Lean.Name.anonymous "foo" : Lean.Name +Lean.mkNameStr (Lean.mkNameStr Lean.Name.anonymous "foo") "bla" : Lean.Name +Lean.mkNameStr Lean.Name.anonymous "foo bla" : Lean.Name +Lean.mkNameStr (Lean.mkNameStr Lean.Name.anonymous "foo bla") "hello world" : Lean.Name +Lean.mkNameStr (Lean.mkNameStr (Lean.mkNameStr Lean.Name.anonymous "foo bla") "boo") "hello world" : Lean.Name +Lean.mkNameStr (Lean.mkNameStr Lean.Name.anonymous "foo") "hello" : Lean.Name +Lean.mkNameStr Lean.Name.anonymous "hello" : Lean.Name +Lean.mkNameStr (Lean.mkNameStr Lean.Name.anonymous "hello") "world !!!" : Lean.Name