From a495a4ce8628fc762e4a767e81070dd9c99d2e60 Mon Sep 17 00:00:00 2001 From: Leonardo de Moura Date: Mon, 17 Jun 2019 13:25:01 -0700 Subject: [PATCH] feat(library/init/lean/parser/parser): new parser combinators based on `tests/playground/parser` Main differences with respect to `tests/playground/parser` 1- There is a single (parametric) parser type: `Parser k`, where `k` is used to identify whether it is a `nud` or `led` parser. 2- It assumes parsing tables are stored in the `Environment`. 3- We check precedence mismatch, and use the value `none` to represent "use existing precedence". 4- We have support for silent (aka epsilon) parsing actions. Remark: the experiments at `tests/playground/parser` demonstrated that the new parsing infrastructure is at least 10x faster than the one based on the `Parsec` monad. --- library/init/lean/parser/parser.lean | 730 +++++++++++++++++++++++++++ 1 file changed, 730 insertions(+) create mode 100644 library/init/lean/parser/parser.lean diff --git a/library/init/lean/parser/parser.lean b/library/init/lean/parser/parser.lean new file mode 100644 index 0000000000..7e15099528 --- /dev/null +++ b/library/init/lean/parser/parser.lean @@ -0,0 +1,730 @@ +/- +Copyright (c) 2019 Microsoft Corporation. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Leonardo de Moura, Sebastian Ullrich +-/ +prelude +import init.lean.position +import init.lean.syntax +import init.lean.environment +import init.lean.parser.trie +import init.lean.parser.identifier + +namespace Lean +namespace Parser + +structure TokenConfig := +(val : String) +(lbp : Option Nat := none) + +namespace TokenConfig + +def beq : TokenConfig → TokenConfig → Bool +| ⟨val₁, lbp₁⟩ ⟨val₂, lbp₂⟩ := val₁ == val₂ && lbp₁ == lbp₂ + +instance : HasBeq TokenConfig := +⟨beq⟩ + +end TokenConfig + +structure TokenCacheEntry := +(startPos stopPos : String.Pos) +(token : Syntax) + +structure ParserCache := +(tokenCache : Option TokenCacheEntry := none) + +structure ParserContext := +(env : Environment) +(input : String) +(filename : String) +(fileMap : FileMap) +(tokens : Trie TokenConfig) + +structure ParserState := +(stxStack : Array Syntax) +(pos : String.Pos) +(cache : ParserCache) +(errorMsg : Option String) + +namespace ParserState + +@[inline] def hasError (s : ParserState) : Bool := +s.errorMsg != none + +@[inline] def stackSize (s : ParserState) : Nat := +s.stxStack.size + +def restore (s : ParserState) (iniStackSz : Nat) (iniPos : Nat) : ParserState := +{ stxStack := s.stxStack.shrink iniStackSz, errorMsg := none, pos := iniPos, .. s} + +def setPos (s : ParserState) (pos : Nat) : ParserState := +{ pos := pos, .. s } + +def setCache (s : ParserState) (cache : ParserCache) : ParserState := +{ cache := cache, .. s } + +def pushSyntax (s : ParserState) (n : Syntax) : ParserState := +{ stxStack := s.stxStack.push n, .. s } + +def shrinkStack (s : ParserState) (iniStackSz : Nat) : ParserState := +{ stxStack := s.stxStack.shrink iniStackSz, .. s } + +def next (s : ParserState) (input : String) (pos : Nat) : ParserState := +{ pos := input.next pos, .. s } + +def toErrorMsg (ctx : ParserContext) (s : ParserState) : String := +match s.errorMsg with +| none := "" +| some msg := + let pos := ctx.fileMap.toPosition s.pos in + ctx.filename ++ ":" ++ toString pos.line ++ ":" ++ toString pos.column ++ " " ++ msg + +def mkNode (s : ParserState) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserState := +match s with +| ⟨stack, pos, cache, err⟩ := + if err != none && stack.size == iniStackSz then + -- If there is an error but there are no new nodes on the stack, we just return `d` + s + else + let newNode := Syntax.node k (stack.extract iniStackSz stack.size) [] in + let stack := stack.shrink iniStackSz in + let stack := stack.push newNode in + ⟨stack, pos, cache, err⟩ + +def mkError (s : ParserState) (msg : String) : ParserState := +match s with +| ⟨stack, pos, cache, _⟩ := ⟨stack, pos, cache, some msg⟩ + +def mkEOIError (s : ParserState) : ParserState := +s.mkError "end of input" + +def mkErrorAt (s : ParserState) (msg : String) (pos : String.Pos) : ParserState := +match s with +| ⟨stack, _, cache, _⟩ := ⟨stack, pos, cache, some msg⟩ + +end ParserState + +inductive ParserKind +| nud | led + +export ParserKind (nud led) + +def ParserArg : ParserKind → Type +| ParserKind.nud := Nat +| ParserKind.led := Syntax + +def BasicParserFn := ParserContext → ParserState → ParserState + +def ParserFn (k : ParserKind) := ParserArg k → BasicParserFn + +instance ParserFn.inhabited (k : ParserKind) : Inhabited (ParserFn k) := ⟨λ _ _, id⟩ + +inductive FirstTokens +| epsilon : FirstTokens +| unknown : FirstTokens +| tokens : List TokenConfig → FirstTokens + +namespace FirstTokens + +def merge : FirstTokens → FirstTokens → FirstTokens +| epsilon tks := tks +| tks epsilon := tks +| (tokens s₁) (tokens s₂) := tokens (s₁ ++ s₂) +| _ _ := unknown + +def seq : FirstTokens → FirstTokens → FirstTokens +| epsilon tks := tks +| tks _ := tks + +end FirstTokens + +structure ParserInfo := +(updateTokens : Trie TokenConfig → ExceptT String Id (Trie TokenConfig) := λ tks, pure tks) +(firstTokens : FirstTokens := FirstTokens.unknown) + +structure Parser (k : ParserKind := nud) := +(info : ParserInfo := {}) +(fn : ParserFn k) + +abbrev TrailingParser := Parser led + +@[noinline] def epsilonInfo : ParserInfo := +{ firstTokens := FirstTokens.epsilon } + +@[inline] def pushLeadingFn : ParserFn led := +λ a c s, s.pushSyntax a + +@[inline] def pushLeading : TrailingParser := +{ info := epsilonInfo, + fn := pushLeadingFn } + +@[inline] def checkLeadingFn (p : Syntax → Bool) : ParserFn led := +λ a c s, + if p a then s + else s.mkError "invalid leading token" + +@[inline] def checkLeading (p : Syntax → Bool) : TrailingParser := +{ info := epsilonInfo, + fn := checkLeadingFn p } + +@[inline] def andthenAux (p q : BasicParserFn) : BasicParserFn := +λ c s, + let s := p c s in + if s.hasError then s else q c s + +@[inline] def andthenFn {k : ParserKind} (p q : ParserFn k) : ParserFn k := +λ a c s, andthenAux (p a) (q a) c s + +@[noinline] def andthenInfo (p q : ParserInfo) : ParserInfo := +{ updateTokens := λ tks, q.updateTokens tks >>= p.updateTokens, + firstTokens := p.firstTokens.seq q.firstTokens } + +@[inline] def andthen {k : ParserKind} (p q : Parser k) : Parser k := +{ info := andthenInfo p.info q.info, + fn := andthenFn p.fn q.fn } + +@[inline] def nodeFn {k : ParserKind} (n : SyntaxNodeKind) (p : ParserFn k) : ParserFn k +| a c s := + let iniSz := s.stackSize in + let s := p a c s in + s.mkNode n iniSz + +@[noinline] def nodeInfo (p : ParserInfo) : ParserInfo := +{ updateTokens := p.updateTokens, + firstTokens := p.firstTokens } + +@[inline] def node {k : ParserKind} (n : SyntaxNodeKind) (p : Parser k) : Parser k := +{ info := nodeInfo p.info, + fn := nodeFn n p.fn } + +@[inline] def orelseFn {k : ParserKind} (p q : ParserFn k) : ParserFn k +| a c s := + let iniSz := s.stackSize in + let iniPos := s.pos in + let s := p a c s in + if s.hasError && s.pos == iniPos then q a c (s.restore iniSz iniPos) else s + +@[noinline] def orelseInfo (p q : ParserInfo) : ParserInfo := +{ updateTokens := λ tks, q.updateTokens tks >>= p.updateTokens, + firstTokens := p.firstTokens.merge q.firstTokens } + +@[inline] def orelse {k : ParserKind} (p q : Parser k) : Parser k := +{ info := orelseInfo p.info q.info, + fn := orelseFn p.fn q.fn } + +@[noinline] def noFirstTokenInfo (info : ParserInfo) : ParserInfo := +{ updateTokens := info.updateTokens } + +@[inline] def tryFn {k : ParserKind} (p : ParserFn k ) : ParserFn k +| a c s := + let iniSz := s.stackSize in + let iniPos := s.pos in + match p a c s with + | ⟨stack, _, cache, some msg⟩ := ⟨stack.shrink iniSz, iniPos, cache, some msg⟩ + | other := other + +@[inline] def try {k : ParserKind} (p : Parser k) : Parser k := +{ info := noFirstTokenInfo p.info, + fn := tryFn p.fn } + +@[inline] def optionalFn {k : ParserKind} (p : ParserFn k) : ParserFn k := +λ a c s, + let iniSz := s.stackSize in + let iniPos := s.pos in + let s := p a c s in + let s := if s.hasError then s.restore iniSz iniPos else s in + s.mkNode nullKind iniSz + +@[inline] def optional {k : ParserKind} (p : Parser k) : Parser k := +{ info := noFirstTokenInfo p.info, + fn := optionalFn p.fn } + +@[specialize] partial def manyAux {k : ParserKind} (p : ParserFn k) : ParserFn k +| a c s := + let iniSz := s.stackSize in + let iniPos := s.pos in + let s := p a c s in + if s.hasError then s.restore iniSz iniPos + else if iniPos == s.pos then s.mkError "invalid 'many' parser combinator application, parser did not consume anything" + else manyAux a c s + +@[inline] def manyFn {k : ParserKind} (p : ParserFn k) : ParserFn k := +λ a c s, + let iniSz := s.stackSize in + let s := manyAux p a c s in + s.mkNode nullKind iniSz + +@[inline] def many {k : ParserKind} (p : Parser k) : Parser k := +{ info := noFirstTokenInfo p.info, + fn := manyFn p.fn } + +@[inline] def many1 {k : ParserKind} (p : Parser k) : Parser k := +andthen p (many p) + +@[specialize] private partial def sepByFnAux {k : ParserKind} (p : ParserFn k) (sep : ParserFn k) (allowTrailingSep : Bool) (iniSz : Nat) : Bool → ParserFn k +| pOpt a c s := + let sz := s.stackSize in + let pos := s.pos in + let s := p a c s in + if s.hasError then + let s := s.restore sz pos in + if pOpt then + s.mkNode nullKind iniSz + else + -- append `Syntax.missing` to make clear that List is incomplete + let s := s.pushSyntax Syntax.missing in + s.mkNode nullKind iniSz + else + let sz := s.stackSize in + let pos := s.pos in + let s := sep a c s in + if s.hasError then + let s := s.restore sz pos in + s.mkNode nullKind iniSz + else + sepByFnAux allowTrailingSep a c s + +@[specialize] def sepByFn {k : ParserKind} (allowTrailingSep : Bool) (p : ParserFn k) (sep : ParserFn k) : ParserFn k +| a c s := + let iniSz := s.stackSize in + sepByFnAux p sep allowTrailingSep iniSz true a c s + +@[specialize] def sepBy1Fn {k : ParserKind} (allowTrailingSep : Bool) (p : ParserFn k) (sep : ParserFn k) : ParserFn k +| a c s := + let iniSz := s.stackSize in + sepByFnAux p sep allowTrailingSep iniSz false a c s + +@[noinline] def sepByInfo (p sep : ParserInfo) : ParserInfo := +{ updateTokens := λ tks, p.updateTokens tks >>= sep.updateTokens } + +@[noinline] def sepBy1Info (p sep : ParserInfo) : ParserInfo := +{ updateTokens := λ tks, p.updateTokens tks >>= sep.updateTokens, + firstTokens := p.firstTokens } + +@[inline] def sepBy {k : ParserKind} (p sep : Parser k) (allowTrailingSep : Bool := false) : Parser k := +{ info := sepByInfo p.info sep.info, + fn := sepByFn allowTrailingSep p.fn sep.fn } + +@[inline] def sepBy1 {k : ParserKind} (p sep : Parser k) (allowTrailingSep : Bool := false) : Parser k := +{ info := sepBy1Info p.info sep.info, + fn := sepBy1Fn allowTrailingSep p.fn sep.fn } + +@[specialize] partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : BasicParserFn +| c s := + let i := s.pos in + if c.input.atEnd i then s.mkEOIError + else if p (c.input.get i) then s.next c.input i + else s.mkError errorMsg + +@[specialize] partial def takeUntilFn (p : Char → Bool) : BasicParserFn +| c s := + let i := s.pos in + if c.input.atEnd i then s + else if p (c.input.get i) then s + else takeUntilFn c (s.next c.input i) + +@[specialize] def takeWhileFn (p : Char → Bool) : BasicParserFn := +takeUntilFn (λ c, !p c) + +@[inline] def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : BasicParserFn := +andthenAux (satisfyFn p errorMsg) (takeWhileFn p) + +partial def finishCommentBlock : Nat → BasicParserFn +| nesting c s := + let input := c.input in + let i := s.pos in + if input.atEnd i then s.mkEOIError + else + let curr := input.get i in + let i := input.next i in + if curr == '-' then + if input.atEnd i then s.mkEOIError + else + let curr := input.get i in + if curr == '/' then -- "-/" end of comment + if nesting == 1 then s.next input i + else finishCommentBlock (nesting-1) c (s.next input i) + else + finishCommentBlock nesting c (s.next input i) + else if curr == '/' then + if input.atEnd i then s.mkEOIError + else + let curr := input.get i in + if curr == '-' then finishCommentBlock (nesting+1) c (s.next input i) + else finishCommentBlock nesting c (s.setPos i) + else finishCommentBlock nesting c (s.setPos i) + +/- Consume whitespace and comments -/ +partial def whitespace : BasicParserFn +| c s := + let input := c.input in + let i := s.pos in + if input.atEnd i then s + else + let curr := input.get i in + if curr.isWhitespace then whitespace c (s.next input i) + else if curr == '-' then + let i := input.next i in + let curr := input.get i in + if curr == '-' then andthenAux (takeUntilFn (= '\n')) whitespace c (s.next input i) + else s + else if curr == '/' then + let i := input.next i in + let curr := input.get i in + if curr == '-' then + let i := input.next i in + let curr := input.get i in + if curr == '-' then s -- "/--" doc comment is an actual token + else andthenAux (finishCommentBlock 1) whitespace c (s.next input i) + else s + else s + +def mkEmptySubstringAt (s : String) (p : Nat) : Substring := +{str := s, startPos := p, stopPos := p } + +private def rawAux {k : ParserKind} (startPos : Nat) (trailingWs : Bool) : ParserFn k +| a c s := + let input := c.input in + let stopPos := s.pos in + let leading := mkEmptySubstringAt input startPos in + let val := input.extract startPos stopPos in + if trailingWs then + let s := whitespace c s in + let stopPos' := s.pos in + let trailing := { Substring . str := input, startPos := stopPos, stopPos := stopPos' } in + let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in + s.pushSyntax atom + else + let trailing := mkEmptySubstringAt input stopPos in + let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in + s.pushSyntax atom + +/-- Match an arbitrary Parser and return the consumed String in a `Syntax.atom`. -/ +@[inline] def rawFn {k : ParserKind} (p : ParserFn k) (trailingWs := false) : ParserFn k +| a c s := + let startPos := s.pos in + let s := p a c s in + if s.hasError then s else rawAux startPos trailingWs a c s + +def hexDigitFn : BasicParserFn +| c s := + let input := c.input in + let i := s.pos in + if input.atEnd i then s.mkEOIError + else + let curr := input.get i in + let i := input.next i in + if curr.isDigit || ('a' <= curr && curr <= 'f') || ('A' <= curr && curr <= 'F') then s.setPos i + else s.mkError "invalid hexadecimal numeral, hexadecimal digit expected" + +def quotedCharFn : BasicParserFn +| c s := + let input := c.input in + let i := s.pos in + if input.atEnd i then s.mkEOIError + else + let curr := input.get i in + if curr == '\\' || curr == '\"' || curr == '\'' || curr == '\n' || curr == '\t' then + s.next input i + else if curr == 'x' then + andthenAux hexDigitFn hexDigitFn c (s.next input i) + else if curr == 'u' then + andthenAux hexDigitFn (andthenAux hexDigitFn (andthenAux hexDigitFn hexDigitFn)) c (s.next input i) + else + s.mkError "invalid escape sequence" + +def mkStrLitKind : IO SyntaxNodeKind := nextKind `strLit +@[init mkStrLitKind] constant strLitKind : SyntaxNodeKind := default _ + +def mkNumberKind : IO SyntaxNodeKind := nextKind `number +@[init mkNumberKind] constant numberKind : SyntaxNodeKind := default _ + +/-- Push `(Syntax.node tk )` into syntax stack -/ +def mkNodeToken (n : SyntaxNodeKind) (startPos : Nat) : BasicParserFn := +λ c s, +let input := c.input in +let stopPos := s.pos in +let leading := mkEmptySubstringAt input startPos in +let val := input.extract startPos stopPos in +let s := whitespace c s in +let wsStopPos := s.pos in +let trailing := { Substring . str := input, startPos := stopPos, stopPos := wsStopPos } in +let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in +let tk := Syntax.node n (Array.singleton atom) [] in +s.pushSyntax tk + +partial def strLitFnAux (startPos : Nat) : BasicParserFn +| c s := + let input := c.input in + let i := s.pos in + if input.atEnd i then s.mkEOIError + else + let curr := input.get i in + let s := s.setPos (input.next i) in + if curr == '\"' then + mkNodeToken strLitKind startPos c s + else if curr == '\\' then andthenAux quotedCharFn strLitFnAux c s + else strLitFnAux c s + +def decimalNumberFn (startPos : Nat) : BasicParserFn := +λ c s, + let s := takeWhileFn (λ c, c.isDigit) c s in + let input := c.input in + let i := s.pos in + let curr := input.get i in + let s := + if curr == '.' then + let i := input.next i in + let curr := input.get i in + if curr.isDigit then + takeWhileFn (λ c, c.isDigit) c (s.setPos i) + else + s + else + s in + mkNodeToken numberKind startPos c s + +def binNumberFn (startPos : Nat) : BasicParserFn := +λ c s, + let s := takeWhile1Fn (λ c, c == '0' || c == '1') "expected binary number" c s in + mkNodeToken numberKind startPos c s + +def octalNumberFn (startPos : Nat) : BasicParserFn := +λ c s, + let s := takeWhile1Fn (λ c, '0' ≤ c && c ≤ '7') "expected octal number" c s in + mkNodeToken numberKind startPos c s + +def hexNumberFn (startPos : Nat) : BasicParserFn := +λ c s, + let s := takeWhile1Fn (λ c, ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')) "expected hexadecimal number" c s in + mkNodeToken numberKind startPos c s + +def numberFnAux : BasicParserFn := +λ c s, + let input := c.input in + let startPos := s.pos in + if input.atEnd startPos then s.mkEOIError + else + let curr := input.get startPos in + if curr == '0' then + let i := input.next startPos in + let curr := input.get i in + if curr == 'b' || curr == 'B' then + binNumberFn startPos c (s.next input i) + else if curr == 'o' || curr == 'O' then + octalNumberFn startPos c (s.next input i) + else if curr == 'x' || curr == 'X' then + hexNumberFn startPos c (s.next input i) + else + decimalNumberFn startPos c (s.setPos i) + else if curr.isDigit then + decimalNumberFn startPos c (s.next input startPos) + else + s.mkError "expected numeral" + +def isIdCont : String → ParserState → Bool +| input s := + let i := s.pos in + let curr := input.get i in + if curr == '.' then + let i := input.next i in + if input.atEnd i then + false + else + let curr := input.get i in + isIdFirst curr || isIdBeginEscape curr + else + false + +private def isToken (idStartPos idStopPos : Nat) (tk : Option TokenConfig) : Bool := +match tk with +| none := false +| some tk := + -- if a token is both a symbol and a valid identifier (i.e. a keyword), + -- we want it to be recognized as a symbol + tk.val.bsize ≥ idStopPos - idStopPos + +def mkTokenAndFixPos (startPos : Nat) (tk : Option TokenConfig) : BasicParserFn := +λ c s, +match tk with +| none := s.mkErrorAt "token expected" startPos +| some tk := + let input := c.input in + let leading := mkEmptySubstringAt input startPos in + let val := tk.val in + let stopPos := startPos + val.bsize in + let s := s.setPos stopPos in + let s := whitespace c s in + let wsStopPos := s.pos in + let trailing := { Substring . str := input, startPos := stopPos, stopPos := wsStopPos } in + let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in + s.pushSyntax atom + +def mkIdResult (startPos : Nat) (tk : Option TokenConfig) (val : Name) : BasicParserFn := +λ c s, +let stopPos := s.pos in +if isToken startPos stopPos tk then + mkTokenAndFixPos startPos tk c s +else + let input := c.input in + let rawVal := { Substring . str := input, startPos := startPos, stopPos := stopPos } in + let s := whitespace c s in + let trailingStopPos := s.pos in + let leading := mkEmptySubstringAt input startPos in + let trailing := { Substring . str := input, startPos := stopPos, stopPos := trailingStopPos } in + let info := { SourceInfo . leading := leading, trailing := trailing, pos := startPos } in + let atom := Syntax.ident (some info) rawVal val [] [] in + s.pushSyntax atom + +partial def identFnAux (startPos : Nat) (tk : Option TokenConfig) : Name → BasicParserFn +| r c s := + let input := c.input in + let i := s.pos in + if input.atEnd i then s.mkEOIError + else + let curr := input.get i in + if isIdBeginEscape curr then + let startPart := input.next i in + let s := takeUntilFn isIdEndEscape c (s.setPos startPart) in + let stopPart := s.pos in + let s := satisfyFn isIdEndEscape "end of escaped identifier expected" c s in + if s.hasError then s + else + let r := Name.mkString r (input.extract startPart stopPart) in + if isIdCont input s then + identFnAux r c s + else + mkIdResult startPos tk r c s + else if isIdFirst curr then + let startPart := i in + let s := takeWhileFn isIdRest c (s.next input i) in + let stopPart := s.pos in + let r := Name.mkString r (input.extract startPart stopPart) in + if isIdCont input s then + identFnAux r c s + else + mkIdResult startPart tk r c s + else + mkTokenAndFixPos startPos tk c s + +private def tokenFnAux : BasicParserFn +| c s := + let input := c.input in + let i := s.pos in + let curr := input.get i in + if curr == '\"' then + strLitFnAux i c (s.next input i) + else if curr.isDigit then + numberFnAux c s + else + let (_, tk) := c.tokens.matchPrefix input i in + identFnAux i tk Name.anonymous c s + +private def updateCache (startPos : Nat) (s : ParserState) : ParserState := +match s with +| ⟨stack, pos, cache, none⟩ := + if stack.size == 0 then s + else + let tk := stack.back in + ⟨stack, pos, { tokenCache := some { startPos := startPos, stopPos := pos, token := tk } }, none⟩ +| other := other + +def tokenFn : BasicParserFn := +λ c s, + let input := c.input in + let i := s.pos in + if input.atEnd i then s.mkEOIError + else + match s.cache with + | { tokenCache := some tkc } := + if tkc.startPos == i then + let s := s.pushSyntax tkc.token in + s.setPos tkc.stopPos + else + let s := tokenFnAux c s in + updateCache i s + | _ := + let s := tokenFnAux c s in + updateCache i s + +@[inline] def satisfySymbolFn (p : String → Bool) (errorMsg : String) : BasicParserFn := +λ c s, + let startPos := s.pos in + let s := tokenFn c s in + if s.hasError then + s.mkErrorAt errorMsg startPos + else + match s.stxStack.back with + | Syntax.atom _ sym := if p sym then s else s.mkErrorAt errorMsg startPos + | _ := s.mkErrorAt errorMsg startPos + +def symbolFnAux (sym : String) (errorMsg : String) : BasicParserFn := +satisfySymbolFn (== sym) errorMsg + +def insertToken (sym : String) (lbp : Option Nat) (tks : Trie TokenConfig) : ExceptT String Id (Trie TokenConfig) := +match tks.find sym, lbp with +| none, _ := pure (tks.insert sym { val := sym, lbp := lbp }) +| some _, none := pure tks +| some tk, some newLbp := + match tk.lbp with + | none := pure (tks.insert sym { val := sym, lbp := lbp }) + | some oldLbp := if newLbp == oldLbp then pure tks else throw ("precedence mismatch for '" ++ toString sym ++ "', previous: " ++ toString oldLbp ++ ", new: " ++ toString newLbp) + +def symbolInfo (sym : String) (lbp : Option Nat) : ParserInfo := +{ updateTokens := insertToken sym lbp, + firstTokens := FirstTokens.tokens [ { val := sym, lbp := lbp } ] } + +@[inline] def symbolFn {k : ParserKind} (sym : String) : ParserFn k := +λ _, symbolFnAux sym ("expected '" ++ sym ++ "'") + +@[inline] def symbol {k : ParserKind} (sym : String) (lbp : Option Nat := none) : Parser k := +{ info := symbolInfo sym lbp, + fn := symbolFn sym } + +def unicodeSymbolFnAux (sym asciiSym : String) (errorMsg : String) : BasicParserFn := +satisfySymbolFn (λ s, s == sym || s == asciiSym) errorMsg + +def unicodeSymbolInfo (sym asciiSym : String) (lbp : Option Nat) : ParserInfo := +{ updateTokens := λ tks, insertToken sym lbp tks >>= insertToken asciiSym lbp, + firstTokens := FirstTokens.tokens [ { val := sym, lbp := lbp }, { val := asciiSym, lbp := lbp } ] } + +@[inline] def unicodeSymbolFn {k : ParserKind} (sym asciiSym : String) : ParserFn k := +λ _, unicodeSymbolFnAux sym asciiSym ("expected '" ++ sym ++ "' or '" ++ asciiSym ++ "'") + +@[inline] def unicodeSymbol {k : ParserKind} (sym asciiSym : String) (lbp : Option Nat := none) : Parser k := +{ info := unicodeSymbolInfo sym asciiSym lbp, + fn := unicodeSymbolFn sym asciiSym } + +def numberFn {k : ParserKind} : ParserFn k := +λ _ c s, + let s := tokenFn c s in + if s.hasError || !(s.stxStack.back.isOfKind numberKind) then s.mkError "expected numeral" else s + +@[inline] def number {k : ParserKind} : Parser k := +{ fn := numberFn } + +def strLitFn {k : ParserKind} : ParserFn k := +λ _ c s, +let s := tokenFn c s in +if s.hasError || !(s.stxStack.back.isOfKind strLitKind) then s.mkError "expected string literal" else s + +@[inline] def strLit {k : ParserKind} : Parser k := +{ fn := strLitFn } + +def identFn {k : ParserKind} : ParserFn k := +λ _ c s, +let s := tokenFn c s in +if s.hasError || !(s.stxStack.back.isIdent) then + s.mkError "expected identifier" +else + s + +@[inline] def ident {k : ParserKind} : Parser k := +{ fn := identFn } + +instance string2basic {k : ParserKind} : HasCoe String (Parser k) := +⟨symbol⟩ + +end Parser +end Lean