lean4-htt/library/init/lean/parser/parser.lean
Leonardo de Moura a495a4ce86 feat(library/init/lean/parser/parser): new parser combinators based on tests/playground/parser
Main differences with respect to `tests/playground/parser`

1- There is a single (parametric) parser type: `Parser k`, where `k` is
   used to identify whether it is a `nud` or `led` parser.
2- It assumes parsing tables are stored in the `Environment`.
3- We check precedence mismatch, and use the value `none` to represent
   "use existing precedence".
4- We have support for silent (aka epsilon) parsing actions.

Remark: the experiments at `tests/playground/parser` demonstrated that the new
parsing infrastructure is at least 10x faster than the one based on the
`Parsec` monad.
2019-06-17 13:29:01 -07:00

730 lines
24 KiB
Text

/-
Copyright (c) 2019 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura, Sebastian Ullrich
-/
prelude
import init.lean.position
import init.lean.syntax
import init.lean.environment
import init.lean.parser.trie
import init.lean.parser.identifier
namespace Lean
namespace Parser
structure TokenConfig :=
(val : String)
(lbp : Option Nat := none)
namespace TokenConfig
def beq : TokenConfig → TokenConfig → Bool
| ⟨val₁, lbp₁⟩ ⟨val₂, lbp₂⟩ := val₁ == val₂ && lbp₁ == lbp₂
instance : HasBeq TokenConfig :=
⟨beq⟩
end TokenConfig
structure TokenCacheEntry :=
(startPos stopPos : String.Pos)
(token : Syntax)
structure ParserCache :=
(tokenCache : Option TokenCacheEntry := none)
structure ParserContext :=
(env : Environment)
(input : String)
(filename : String)
(fileMap : FileMap)
(tokens : Trie TokenConfig)
structure ParserState :=
(stxStack : Array Syntax)
(pos : String.Pos)
(cache : ParserCache)
(errorMsg : Option String)
namespace ParserState
@[inline] def hasError (s : ParserState) : Bool :=
s.errorMsg != none
@[inline] def stackSize (s : ParserState) : Nat :=
s.stxStack.size
def restore (s : ParserState) (iniStackSz : Nat) (iniPos : Nat) : ParserState :=
{ stxStack := s.stxStack.shrink iniStackSz, errorMsg := none, pos := iniPos, .. s}
def setPos (s : ParserState) (pos : Nat) : ParserState :=
{ pos := pos, .. s }
def setCache (s : ParserState) (cache : ParserCache) : ParserState :=
{ cache := cache, .. s }
def pushSyntax (s : ParserState) (n : Syntax) : ParserState :=
{ stxStack := s.stxStack.push n, .. s }
def shrinkStack (s : ParserState) (iniStackSz : Nat) : ParserState :=
{ stxStack := s.stxStack.shrink iniStackSz, .. s }
def next (s : ParserState) (input : String) (pos : Nat) : ParserState :=
{ pos := input.next pos, .. s }
def toErrorMsg (ctx : ParserContext) (s : ParserState) : String :=
match s.errorMsg with
| none := ""
| some msg :=
let pos := ctx.fileMap.toPosition s.pos in
ctx.filename ++ ":" ++ toString pos.line ++ ":" ++ toString pos.column ++ " " ++ msg
def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState :=
match s with
| ⟨stack, pos, cache, err⟩ :=
if err != none && stack.size == iniStackSz then
-- If there is an error but there are no new nodes on the stack, we just return `d`
s
else
let newNode := Syntax.node k (stack.extract iniStackSz stack.size) [] in
let stack := stack.shrink iniStackSz in
let stack := stack.push newNode in
⟨stack, pos, cache, err⟩
def mkError (s : ParserState) (msg : String) : ParserState :=
match s with
| ⟨stack, pos, cache, _⟩ := ⟨stack, pos, cache, some msg⟩
def mkEOIError (s : ParserState) : ParserState :=
s.mkError "end of input"
def mkErrorAt (s : ParserState) (msg : String) (pos : String.Pos) : ParserState :=
match s with
| ⟨stack, _, cache, _⟩ := ⟨stack, pos, cache, some msg⟩
end ParserState
inductive ParserKind
| nud | led
export ParserKind (nud led)
def ParserArg : ParserKind → Type
| ParserKind.nud := Nat
| ParserKind.led := Syntax
def BasicParserFn := ParserContext → ParserState → ParserState
def ParserFn (k : ParserKind) := ParserArg k → BasicParserFn
instance ParserFn.inhabited (k : ParserKind) : Inhabited (ParserFn k) := ⟨λ _ _, id⟩
inductive FirstTokens
| epsilon : FirstTokens
| unknown : FirstTokens
| tokens : List TokenConfig → FirstTokens
namespace FirstTokens
def merge : FirstTokens → FirstTokens → FirstTokens
| epsilon tks := tks
| tks epsilon := tks
| (tokens s₁) (tokens s₂) := tokens (s₁ ++ s₂)
| _ _ := unknown
def seq : FirstTokens → FirstTokens → FirstTokens
| epsilon tks := tks
| tks _ := tks
end FirstTokens
structure ParserInfo :=
(updateTokens : Trie TokenConfig → ExceptT String Id (Trie TokenConfig) := λ tks, pure tks)
(firstTokens : FirstTokens := FirstTokens.unknown)
structure Parser (k : ParserKind := nud) :=
(info : ParserInfo := {})
(fn : ParserFn k)
abbrev TrailingParser := Parser led
@[noinline] def epsilonInfo : ParserInfo :=
{ firstTokens := FirstTokens.epsilon }
@[inline] def pushLeadingFn : ParserFn led :=
λ a c s, s.pushSyntax a
@[inline] def pushLeading : TrailingParser :=
{ info := epsilonInfo,
fn := pushLeadingFn }
@[inline] def checkLeadingFn (p : Syntax → Bool) : ParserFn led :=
λ a c s,
if p a then s
else s.mkError "invalid leading token"
@[inline] def checkLeading (p : Syntax → Bool) : TrailingParser :=
{ info := epsilonInfo,
fn := checkLeadingFn p }
@[inline] def andthenAux (p q : BasicParserFn) : BasicParserFn :=
λ c s,
let s := p c s in
if s.hasError then s else q c s
@[inline] def andthenFn {k : ParserKind} (p q : ParserFn k) : ParserFn k :=
λ a c s, andthenAux (p a) (q a) c s
@[noinline] def andthenInfo (p q : ParserInfo) : ParserInfo :=
{ updateTokens := λ tks, q.updateTokens tks >>= p.updateTokens,
firstTokens := p.firstTokens.seq q.firstTokens }
@[inline] def andthen {k : ParserKind} (p q : Parser k) : Parser k :=
{ info := andthenInfo p.info q.info,
fn := andthenFn p.fn q.fn }
@[inline] def nodeFn {k : ParserKind} (n : SyntaxNodeKind) (p : ParserFn k) : ParserFn k
| a c s :=
let iniSz := s.stackSize in
let s := p a c s in
s.mkNode n iniSz
@[noinline] def nodeInfo (p : ParserInfo) : ParserInfo :=
{ updateTokens := p.updateTokens,
firstTokens := p.firstTokens }
@[inline] def node {k : ParserKind} (n : SyntaxNodeKind) (p : Parser k) : Parser k :=
{ info := nodeInfo p.info,
fn := nodeFn n p.fn }
@[inline] def orelseFn {k : ParserKind} (p q : ParserFn k) : ParserFn k
| a c s :=
let iniSz := s.stackSize in
let iniPos := s.pos in
let s := p a c s in
if s.hasError && s.pos == iniPos then q a c (s.restore iniSz iniPos) else s
@[noinline] def orelseInfo (p q : ParserInfo) : ParserInfo :=
{ updateTokens := λ tks, q.updateTokens tks >>= p.updateTokens,
firstTokens := p.firstTokens.merge q.firstTokens }
@[inline] def orelse {k : ParserKind} (p q : Parser k) : Parser k :=
{ info := orelseInfo p.info q.info,
fn := orelseFn p.fn q.fn }
@[noinline] def noFirstTokenInfo (info : ParserInfo) : ParserInfo :=
{ updateTokens := info.updateTokens }
@[inline] def tryFn {k : ParserKind} (p : ParserFn k ) : ParserFn k
| a c s :=
let iniSz := s.stackSize in
let iniPos := s.pos in
match p a c s with
| ⟨stack, _, cache, some msg⟩ := ⟨stack.shrink iniSz, iniPos, cache, some msg⟩
| other := other
@[inline] def try {k : ParserKind} (p : Parser k) : Parser k :=
{ info := noFirstTokenInfo p.info,
fn := tryFn p.fn }
@[inline] def optionalFn {k : ParserKind} (p : ParserFn k) : ParserFn k :=
λ a c s,
let iniSz := s.stackSize in
let iniPos := s.pos in
let s := p a c s in
let s := if s.hasError then s.restore iniSz iniPos else s in
s.mkNode nullKind iniSz
@[inline] def optional {k : ParserKind} (p : Parser k) : Parser k :=
{ info := noFirstTokenInfo p.info,
fn := optionalFn p.fn }
@[specialize] partial def manyAux {k : ParserKind} (p : ParserFn k) : ParserFn k
| a c s :=
let iniSz := s.stackSize in
let iniPos := s.pos in
let s := p a c s in
if s.hasError then s.restore iniSz iniPos
else if iniPos == s.pos then s.mkError "invalid 'many' parser combinator application, parser did not consume anything"
else manyAux a c s
@[inline] def manyFn {k : ParserKind} (p : ParserFn k) : ParserFn k :=
λ a c s,
let iniSz := s.stackSize in
let s := manyAux p a c s in
s.mkNode nullKind iniSz
@[inline] def many {k : ParserKind} (p : Parser k) : Parser k :=
{ info := noFirstTokenInfo p.info,
fn := manyFn p.fn }
@[inline] def many1 {k : ParserKind} (p : Parser k) : Parser k :=
andthen p (many p)
@[specialize] private partial def sepByFnAux {k : ParserKind} (p : ParserFn k) (sep : ParserFn k) (allowTrailingSep : Bool) (iniSz : Nat) : Bool → ParserFn k
| pOpt a c s :=
let sz := s.stackSize in
let pos := s.pos in
let s := p a c s in
if s.hasError then
let s := s.restore sz pos in
if pOpt then
s.mkNode nullKind iniSz
else
-- append `Syntax.missing` to make clear that List is incomplete
let s := s.pushSyntax Syntax.missing in
s.mkNode nullKind iniSz
else
let sz := s.stackSize in
let pos := s.pos in
let s := sep a c s in
if s.hasError then
let s := s.restore sz pos in
s.mkNode nullKind iniSz
else
sepByFnAux allowTrailingSep a c s
@[specialize] def sepByFn {k : ParserKind} (allowTrailingSep : Bool) (p : ParserFn k) (sep : ParserFn k) : ParserFn k
| a c s :=
let iniSz := s.stackSize in
sepByFnAux p sep allowTrailingSep iniSz true a c s
@[specialize] def sepBy1Fn {k : ParserKind} (allowTrailingSep : Bool) (p : ParserFn k) (sep : ParserFn k) : ParserFn k
| a c s :=
let iniSz := s.stackSize in
sepByFnAux p sep allowTrailingSep iniSz false a c s
@[noinline] def sepByInfo (p sep : ParserInfo) : ParserInfo :=
{ updateTokens := λ tks, p.updateTokens tks >>= sep.updateTokens }
@[noinline] def sepBy1Info (p sep : ParserInfo) : ParserInfo :=
{ updateTokens := λ tks, p.updateTokens tks >>= sep.updateTokens,
firstTokens := p.firstTokens }
@[inline] def sepBy {k : ParserKind} (p sep : Parser k) (allowTrailingSep : Bool := false) : Parser k :=
{ info := sepByInfo p.info sep.info,
fn := sepByFn allowTrailingSep p.fn sep.fn }
@[inline] def sepBy1 {k : ParserKind} (p sep : Parser k) (allowTrailingSep : Bool := false) : Parser k :=
{ info := sepBy1Info p.info sep.info,
fn := sepBy1Fn allowTrailingSep p.fn sep.fn }
@[specialize] partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : BasicParserFn
| c s :=
let i := s.pos in
if c.input.atEnd i then s.mkEOIError
else if p (c.input.get i) then s.next c.input i
else s.mkError errorMsg
@[specialize] partial def takeUntilFn (p : Char → Bool) : BasicParserFn
| c s :=
let i := s.pos in
if c.input.atEnd i then s
else if p (c.input.get i) then s
else takeUntilFn c (s.next c.input i)
@[specialize] def takeWhileFn (p : Char → Bool) : BasicParserFn :=
takeUntilFn (λ c, !p c)
@[inline] def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : BasicParserFn :=
andthenAux (satisfyFn p errorMsg) (takeWhileFn p)
partial def finishCommentBlock : Nat → BasicParserFn
| nesting c s :=
let input := c.input in
let i := s.pos in
if input.atEnd i then s.mkEOIError
else
let curr := input.get i in
let i := input.next i in
if curr == '-' then
if input.atEnd i then s.mkEOIError
else
let curr := input.get i in
if curr == '/' then -- "-/" end of comment
if nesting == 1 then s.next input i
else finishCommentBlock (nesting-1) c (s.next input i)
else
finishCommentBlock nesting c (s.next input i)
else if curr == '/' then
if input.atEnd i then s.mkEOIError
else
let curr := input.get i in
if curr == '-' then finishCommentBlock (nesting+1) c (s.next input i)
else finishCommentBlock nesting c (s.setPos i)
else finishCommentBlock nesting c (s.setPos i)
/- Consume whitespace and comments -/
partial def whitespace : BasicParserFn
| c s :=
let input := c.input in
let i := s.pos in
if input.atEnd i then s
else
let curr := input.get i in
if curr.isWhitespace then whitespace c (s.next input i)
else if curr == '-' then
let i := input.next i in
let curr := input.get i in
if curr == '-' then andthenAux (takeUntilFn (= '\n')) whitespace c (s.next input i)
else s
else if curr == '/' then
let i := input.next i in
let curr := input.get i in
if curr == '-' then
let i := input.next i in
let curr := input.get i in
if curr == '-' then s -- "/--" doc comment is an actual token
else andthenAux (finishCommentBlock 1) whitespace c (s.next input i)
else s
else s
def mkEmptySubstringAt (s : String) (p : Nat) : Substring :=
{str := s, startPos := p, stopPos := p }
private def rawAux {k : ParserKind} (startPos : Nat) (trailingWs : Bool) : ParserFn k
| a c s :=
let input := c.input in
let stopPos := s.pos in
let leading := mkEmptySubstringAt input startPos in
let val := input.extract startPos stopPos in
if trailingWs then
let s := whitespace c s in
let stopPos' := s.pos in
let trailing := { Substring . str := input, startPos := stopPos, stopPos := stopPos' } in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
s.pushSyntax atom
else
let trailing := mkEmptySubstringAt input stopPos in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
s.pushSyntax atom
/-- Match an arbitrary Parser and return the consumed String in a `Syntax.atom`. -/
@[inline] def rawFn {k : ParserKind} (p : ParserFn k) (trailingWs := false) : ParserFn k
| a c s :=
let startPos := s.pos in
let s := p a c s in
if s.hasError then s else rawAux startPos trailingWs a c s
def hexDigitFn : BasicParserFn
| c s :=
let input := c.input in
let i := s.pos in
if input.atEnd i then s.mkEOIError
else
let curr := input.get i in
let i := input.next i in
if curr.isDigit || ('a' <= curr && curr <= 'f') || ('A' <= curr && curr <= 'F') then s.setPos i
else s.mkError "invalid hexadecimal numeral, hexadecimal digit expected"
def quotedCharFn : BasicParserFn
| c s :=
let input := c.input in
let i := s.pos in
if input.atEnd i then s.mkEOIError
else
let curr := input.get i in
if curr == '\\' || curr == '\"' || curr == '\'' || curr == '\n' || curr == '\t' then
s.next input i
else if curr == 'x' then
andthenAux hexDigitFn hexDigitFn c (s.next input i)
else if curr == 'u' then
andthenAux hexDigitFn (andthenAux hexDigitFn (andthenAux hexDigitFn hexDigitFn)) c (s.next input i)
else
s.mkError "invalid escape sequence"
def mkStrLitKind : IO SyntaxNodeKind := nextKind `strLit
@[init mkStrLitKind] constant strLitKind : SyntaxNodeKind := default _
def mkNumberKind : IO SyntaxNodeKind := nextKind `number
@[init mkNumberKind] constant numberKind : SyntaxNodeKind := default _
/-- Push `(Syntax.node tk <new-atom>)` into syntax stack -/
def mkNodeToken (n : SyntaxNodeKind) (startPos : Nat) : BasicParserFn :=
λ c s,
let input := c.input in
let stopPos := s.pos in
let leading := mkEmptySubstringAt input startPos in
let val := input.extract startPos stopPos in
let s := whitespace c s in
let wsStopPos := s.pos in
let trailing := { Substring . str := input, startPos := stopPos, stopPos := wsStopPos } in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
let tk := Syntax.node n (Array.singleton atom) [] in
s.pushSyntax tk
partial def strLitFnAux (startPos : Nat) : BasicParserFn
| c s :=
let input := c.input in
let i := s.pos in
if input.atEnd i then s.mkEOIError
else
let curr := input.get i in
let s := s.setPos (input.next i) in
if curr == '\"' then
mkNodeToken strLitKind startPos c s
else if curr == '\\' then andthenAux quotedCharFn strLitFnAux c s
else strLitFnAux c s
def decimalNumberFn (startPos : Nat) : BasicParserFn :=
λ c s,
let s := takeWhileFn (λ c, c.isDigit) c s in
let input := c.input in
let i := s.pos in
let curr := input.get i in
let s :=
if curr == '.' then
let i := input.next i in
let curr := input.get i in
if curr.isDigit then
takeWhileFn (λ c, c.isDigit) c (s.setPos i)
else
s
else
s in
mkNodeToken numberKind startPos c s
def binNumberFn (startPos : Nat) : BasicParserFn :=
λ c s,
let s := takeWhile1Fn (λ c, c == '0' || c == '1') "expected binary number" c s in
mkNodeToken numberKind startPos c s
def octalNumberFn (startPos : Nat) : BasicParserFn :=
λ c s,
let s := takeWhile1Fn (λ c, '0' ≤ c && c ≤ '7') "expected octal number" c s in
mkNodeToken numberKind startPos c s
def hexNumberFn (startPos : Nat) : BasicParserFn :=
λ c s,
let s := takeWhile1Fn (λ c, ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')) "expected hexadecimal number" c s in
mkNodeToken numberKind startPos c s
def numberFnAux : BasicParserFn :=
λ c s,
let input := c.input in
let startPos := s.pos in
if input.atEnd startPos then s.mkEOIError
else
let curr := input.get startPos in
if curr == '0' then
let i := input.next startPos in
let curr := input.get i in
if curr == 'b' || curr == 'B' then
binNumberFn startPos c (s.next input i)
else if curr == 'o' || curr == 'O' then
octalNumberFn startPos c (s.next input i)
else if curr == 'x' || curr == 'X' then
hexNumberFn startPos c (s.next input i)
else
decimalNumberFn startPos c (s.setPos i)
else if curr.isDigit then
decimalNumberFn startPos c (s.next input startPos)
else
s.mkError "expected numeral"
def isIdCont : String → ParserState → Bool
| input s :=
let i := s.pos in
let curr := input.get i in
if curr == '.' then
let i := input.next i in
if input.atEnd i then
false
else
let curr := input.get i in
isIdFirst curr || isIdBeginEscape curr
else
false
private def isToken (idStartPos idStopPos : Nat) (tk : Option TokenConfig) : Bool :=
match tk with
| none := false
| some tk :=
-- if a token is both a symbol and a valid identifier (i.e. a keyword),
-- we want it to be recognized as a symbol
tk.val.bsize ≥ idStopPos - idStopPos
def mkTokenAndFixPos (startPos : Nat) (tk : Option TokenConfig) : BasicParserFn :=
λ c s,
match tk with
| none := s.mkErrorAt "token expected" startPos
| some tk :=
let input := c.input in
let leading := mkEmptySubstringAt input startPos in
let val := tk.val in
let stopPos := startPos + val.bsize in
let s := s.setPos stopPos in
let s := whitespace c s in
let wsStopPos := s.pos in
let trailing := { Substring . str := input, startPos := stopPos, stopPos := wsStopPos } in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
s.pushSyntax atom
def mkIdResult (startPos : Nat) (tk : Option TokenConfig) (val : Name) : BasicParserFn :=
λ c s,
let stopPos := s.pos in
if isToken startPos stopPos tk then
mkTokenAndFixPos startPos tk c s
else
let input := c.input in
let rawVal := { Substring . str := input, startPos := startPos, stopPos := stopPos } in
let s := whitespace c s in
let trailingStopPos := s.pos in
let leading := mkEmptySubstringAt input startPos in
let trailing := { Substring . str := input, startPos := stopPos, stopPos := trailingStopPos } in
let info := { SourceInfo . leading := leading, trailing := trailing, pos := startPos } in
let atom := Syntax.ident (some info) rawVal val [] [] in
s.pushSyntax atom
partial def identFnAux (startPos : Nat) (tk : Option TokenConfig) : Name → BasicParserFn
| r c s :=
let input := c.input in
let i := s.pos in
if input.atEnd i then s.mkEOIError
else
let curr := input.get i in
if isIdBeginEscape curr then
let startPart := input.next i in
let s := takeUntilFn isIdEndEscape c (s.setPos startPart) in
let stopPart := s.pos in
let s := satisfyFn isIdEndEscape "end of escaped identifier expected" c s in
if s.hasError then s
else
let r := Name.mkString r (input.extract startPart stopPart) in
if isIdCont input s then
identFnAux r c s
else
mkIdResult startPos tk r c s
else if isIdFirst curr then
let startPart := i in
let s := takeWhileFn isIdRest c (s.next input i) in
let stopPart := s.pos in
let r := Name.mkString r (input.extract startPart stopPart) in
if isIdCont input s then
identFnAux r c s
else
mkIdResult startPart tk r c s
else
mkTokenAndFixPos startPos tk c s
private def tokenFnAux : BasicParserFn
| c s :=
let input := c.input in
let i := s.pos in
let curr := input.get i in
if curr == '\"' then
strLitFnAux i c (s.next input i)
else if curr.isDigit then
numberFnAux c s
else
let (_, tk) := c.tokens.matchPrefix input i in
identFnAux i tk Name.anonymous c s
private def updateCache (startPos : Nat) (s : ParserState) : ParserState :=
match s with
| ⟨stack, pos, cache, none⟩ :=
if stack.size == 0 then s
else
let tk := stack.back in
⟨stack, pos, { tokenCache := some { startPos := startPos, stopPos := pos, token := tk } }, none⟩
| other := other
def tokenFn : BasicParserFn :=
λ c s,
let input := c.input in
let i := s.pos in
if input.atEnd i then s.mkEOIError
else
match s.cache with
| { tokenCache := some tkc } :=
if tkc.startPos == i then
let s := s.pushSyntax tkc.token in
s.setPos tkc.stopPos
else
let s := tokenFnAux c s in
updateCache i s
| _ :=
let s := tokenFnAux c s in
updateCache i s
@[inline] def satisfySymbolFn (p : String → Bool) (errorMsg : String) : BasicParserFn :=
λ c s,
let startPos := s.pos in
let s := tokenFn c s in
if s.hasError then
s.mkErrorAt errorMsg startPos
else
match s.stxStack.back with
| Syntax.atom _ sym := if p sym then s else s.mkErrorAt errorMsg startPos
| _ := s.mkErrorAt errorMsg startPos
def symbolFnAux (sym : String) (errorMsg : String) : BasicParserFn :=
satisfySymbolFn (== sym) errorMsg
def insertToken (sym : String) (lbp : Option Nat) (tks : Trie TokenConfig) : ExceptT String Id (Trie TokenConfig) :=
match tks.find sym, lbp with
| none, _ := pure (tks.insert sym { val := sym, lbp := lbp })
| some _, none := pure tks
| some tk, some newLbp :=
match tk.lbp with
| none := pure (tks.insert sym { val := sym, lbp := lbp })
| some oldLbp := if newLbp == oldLbp then pure tks else throw ("precedence mismatch for '" ++ toString sym ++ "', previous: " ++ toString oldLbp ++ ", new: " ++ toString newLbp)
def symbolInfo (sym : String) (lbp : Option Nat) : ParserInfo :=
{ updateTokens := insertToken sym lbp,
firstTokens := FirstTokens.tokens [ { val := sym, lbp := lbp } ] }
@[inline] def symbolFn {k : ParserKind} (sym : String) : ParserFn k :=
λ _, symbolFnAux sym ("expected '" ++ sym ++ "'")
@[inline] def symbol {k : ParserKind} (sym : String) (lbp : Option Nat := none) : Parser k :=
{ info := symbolInfo sym lbp,
fn := symbolFn sym }
def unicodeSymbolFnAux (sym asciiSym : String) (errorMsg : String) : BasicParserFn :=
satisfySymbolFn (λ s, s == sym || s == asciiSym) errorMsg
def unicodeSymbolInfo (sym asciiSym : String) (lbp : Option Nat) : ParserInfo :=
{ updateTokens := λ tks, insertToken sym lbp tks >>= insertToken asciiSym lbp,
firstTokens := FirstTokens.tokens [ { val := sym, lbp := lbp }, { val := asciiSym, lbp := lbp } ] }
@[inline] def unicodeSymbolFn {k : ParserKind} (sym asciiSym : String) : ParserFn k :=
λ _, unicodeSymbolFnAux sym asciiSym ("expected '" ++ sym ++ "' or '" ++ asciiSym ++ "'")
@[inline] def unicodeSymbol {k : ParserKind} (sym asciiSym : String) (lbp : Option Nat := none) : Parser k :=
{ info := unicodeSymbolInfo sym asciiSym lbp,
fn := unicodeSymbolFn sym asciiSym }
def numberFn {k : ParserKind} : ParserFn k :=
λ _ c s,
let s := tokenFn c s in
if s.hasError || !(s.stxStack.back.isOfKind numberKind) then s.mkError "expected numeral" else s
@[inline] def number {k : ParserKind} : Parser k :=
{ fn := numberFn }
def strLitFn {k : ParserKind} : ParserFn k :=
λ _ c s,
let s := tokenFn c s in
if s.hasError || !(s.stxStack.back.isOfKind strLitKind) then s.mkError "expected string literal" else s
@[inline] def strLit {k : ParserKind} : Parser k :=
{ fn := strLitFn }
def identFn {k : ParserKind} : ParserFn k :=
λ _ c s,
let s := tokenFn c s in
if s.hasError || !(s.stxStack.back.isIdent) then
s.mkError "expected identifier"
else
s
@[inline] def ident {k : ParserKind} : Parser k :=
{ fn := identFn }
instance string2basic {k : ParserKind} : HasCoe String (Parser k) :=
⟨symbol⟩
end Parser
end Lean