test(tests/playground/parser): replace parser.lean with parser2.lean

This commit is contained in:
Leonardo de Moura 2019-04-12 07:50:50 -07:00
parent 87690217c6
commit e53cb81255
5 changed files with 645 additions and 1039 deletions

View file

@ -1,7 +1,7 @@
LEANC=../../../bin/leanc
LEAN=../../../bin/lean
COMMON_SRCS = parser.lean syntax.lean filemap.lean
SRCS = $(COMMON_SRCS) test1.lean test2.lean
SRCS = $(COMMON_SRCS) test1.lean
OLEANS = $(SRCS:.lean=.olean)
CPPS = $(SRCS:.lean=.cpp)
COMMON_OBJS = $(COMMON_SRCS:.lean=.o)
@ -31,8 +31,8 @@ depends: $(DEPS)
test1: test1.o $(COMMON_OBJS)
$(LEANC) -o $@ $^
test2: test2.o $(COMMON_OBJS)
$(LEANC) -o $@ $^
# test2: test2.o $(COMMON_OBJS)
# $(LEANC) -o $@ $^
clean:
rm -f *.olean

View file

@ -1,337 +1,671 @@
namespace Lean
import init.lean.name init.lean.parser.trie init.lean.parser.identifier
import syntax filemap
open Lean
export Lean.Parser (Trie)
-- namespace Lean
namespace Parser
abbrev Pos := String.Pos
structure FrontendConfig :=
(filename : String)
(input : String)
(fileMap : FileMap)
/-
σ is the non-backtrackable State
δ is the backtrackable State
μ is the extra error Message data
-/
inductive Result (σ δ μ α : Type)
| ok {} (a : α) (i : Pos) (st : σ) (bst : δ) : Result
| error {} (msg : String) (i : Pos) (st : σ) (extra : Option μ) : Result
structure TokenConfig :=
(val : String)
(lbp : Nat := 0)
inductive Result.IsOk {σ δ μ α : Type} : Result σ δ μ α → Prop
| mk (a : α) (i : Pos) (st : σ) (bst : δ) : Result.IsOk (Result.ok a i st bst)
namespace TokenConfig
theorem errorIsNotOk {σ δ μ α : Type} {msg : String} {i : Pos} {st : σ} {extra : Option μ}
(h : Result.IsOk (@Result.error σ δ μ α msg i st extra)) : False :=
match h with end
def beq : TokenConfig → TokenConfig → Bool
| ⟨val₁, lbp₁⟩ ⟨val₂, lbp₂⟩ := val₁ == val₂ && lbp₁ == lbp₂
@[inline] def unreachableError {σ δ μ α β : Type} {msg : String} {i : Pos} {st : σ} {extra : Option μ}
(h : Result.IsOk (@Result.error σ δ μ α msg i st extra)) : β :=
False.elim (errorIsNotOk h)
instance : HasBeq TokenConfig :=
⟨beq⟩
def input (σ δ μ : Type) : Type := { r : Result σ δ μ Unit // r.IsOk }
end TokenConfig
@[inline] def mkInput {σ δ μ : Type} (i : Pos) (st : σ) (bst : δ) : input σ δ μ :=
⟨Result.ok () i st bst, Result.IsOk.mk _ _ _ _⟩
structure TokenCacheEntry :=
(startPos stopPos : String.Pos)
(token : Syntax)
def ParserM (σ δ μ α : Type) :=
String → input σ δ μ → Result σ δ μ α
structure ParserCache :=
(tokenCache : Option TokenCacheEntry := none)
variables {σ δ μ α β : Type}
structure ParserConfig extends FrontendConfig :=
(tokens : Trie TokenConfig)
namespace ParserM
structure ParserData :=
(stxStack : Array Syntax) (pos : String.Pos) (cache : ParserCache) (errorMsg : Option String)
protected def default : ParserM σ δ μ α :=
λ _ inp,
match inp with
| ⟨Result.ok _ i st bst, h⟩ := Result.error "" i st none
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
@[inline] def ParserData.hasError (d : ParserData) : Bool :=
d.errorMsg != none
instance : Inhabited (ParserM σ δ μ α) :=
⟨ParserM.default⟩
@[inline] def ParserData.stackSize (d : ParserData) : Nat :=
d.stxStack.size
@[inline] def run (p : ParserM σ δ μ α) (st : σ) (bst : δ) (s : String) : Result σ δ μ α :=
p s (mkInput 0 st bst)
def ParserData.restore (d : ParserData) (iniStackSz : Nat) (iniPos : Nat) : ParserData :=
match d with
| ⟨stack, _, cache, _⟩ := ⟨stack.shrink iniStackSz, iniPos, cache, none⟩
@[inline] def pure (a : α) : ParserM σ δ μ α :=
λ _ inp,
match inp with
| ⟨Result.ok _ it st bst, h⟩ := Result.ok a it st bst
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
def ParserData.setPos (d : ParserData) (pos : Nat) : ParserData :=
match d with
| ⟨stack, _, cache, msg⟩ := ⟨stack, pos, cache, msg⟩
@[inline] def bind (x : ParserM σ δ μ α) (f : α → ParserM σ δ μ β) : ParserM σ δ μ β :=
λ str inp,
match x str inp with
| Result.ok a i st bst := f a str (mkInput i st bst)
| Result.error msg i st etx := Result.error msg i st etx
def ParserData.setCache (d : ParserData) (cache : ParserCache) : ParserData :=
match d with
| ⟨stack, pos, _, msg⟩ := ⟨stack, pos, cache, msg⟩
instance isMonad : Monad (ParserM σ δ μ) :=
{pure := @ParserM.pure _ _ _, bind := @ParserM.bind _ _ _}
def ParserData.pushSyntax (d : ParserData) (n : Syntax) : ParserData :=
match d with
| ⟨stack, pos, cache, msg⟩ := ⟨stack.push n, pos, cache, msg⟩
def mkError (r : input σ δ μ) (msg : String) : Result σ δ μ α :=
match r with
| ⟨Result.ok _ i st _, _⟩ := Result.error msg i st none
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
def ParserData.next (d : ParserData) (s : String) (pos : Nat) : ParserData :=
d.setPos (s.next pos)
@[inline] def orelse (p q : ParserM σ δ μ α) : ParserM σ δ μ α :=
λ str inp,
match inp with
| ⟨Result.ok _ i₁ _ bst₁, _⟩ :=
(match p str inp with
| err@(Result.error _ i₂ st₂ _) := if i₁ == i₂ then q str (mkInput i₁ st₂ bst₁) else err
| other := other)
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
def ParserData.toErrorMsg (d : ParserData) (cfg : ParserConfig) : String :=
match d.errorMsg with
| none := ""
| some msg :=
let pos := cfg.fileMap.toPosition d.pos in
cfg.filename ++ ":" ++ toString pos.line ++ ":" ++ toString pos.column ++ " " ++ msg
@[inline] def failure : ParserM σ δ μ α :=
λ _ inp, mkError inp "failure"
def ParserFn := String → ParserData → ParserData
instance : Alternative (ParserM σ δ μ) :=
{ orelse := @ParserM.orelse _ _ _,
failure := @ParserM.failure _ _ _,
.. ParserM.isMonad }
instance : Inhabited ParserFn :=
⟨λ s, id⟩
@[inline] def currPos : input σ δ μ → Pos
| ⟨Result.ok _ i _ _, _⟩ := i
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
structure ParserInfo :=
(updateTokens : Trie TokenConfig → Trie TokenConfig := λ tks, tks)
(firstToken : Option TokenConfig := none)
@[inline] def try {α : Type} (p : ParserM σ δ μ α) : ParserM σ δ μ α :=
λ str inp,
match inp with
| ⟨Result.ok _ i _ _, _⟩ := (match p str inp with
| Result.error msg _ st x := Result.error msg i st x
| other := other)
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
@[inline] def andthenFn (p q : ParserFn) : ParserFn
| s d :=
let d := p s d in
if d.hasError then d else q s d
@[inline] def lookahead (p : ParserM σ δ μ α) : ParserM σ δ μ α :=
λ str inp,
match inp with
| ⟨Result.ok _ i _ bst, _⟩ :=
(match p str inp with
| Result.ok a _ st _ := Result.ok a i st bst
| other := other)
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
@[noinline] def andthenInfo (p q : ParserInfo) : ParserInfo :=
{ updateTokens := q.updateTokens ∘ p.updateTokens,
firstToken := p.firstToken }
@[specialize] def satisfy (p : Char → Bool) (errorMsg := "unexpected character") : ParserM σ δ μ Char :=
λ str inp,
match inp with
| ⟨Result.ok _ i st bst, _⟩ :=
if str.atEnd i then mkError inp "end of input"
else let c := str.get i in
if p c then Result.ok c (str.next i) st bst
else mkError inp errorMsg
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
def ParserData.mkNode (d : ParserData) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserData :=
match d with
| ⟨stack, pos, cache, err⟩ :=
if err != none && stack.size == iniStackSz then
-- If there is an error but there are no new nodes on the stack, we just return `d`
d
else
let newNode := Syntax.node k (stack.extract iniStackSz stack.size) [] in
let stack := stack.shrink iniStackSz in
let stack := stack.push newNode in
⟨stack, pos, cache, err⟩
@[inline] def ch (c : Char) : ParserM σ δ μ Char :=
satisfy (== c) ("expected " ++ repr c)
@[inline] def nodeFn (k : SyntaxNodeKind) (p : ParserFn) : ParserFn
| s d :=
let iniSz := d.stackSize in
let d := p s d in
d.mkNode k iniSz
@[inline] def any : ParserM σ δ μ Char :=
satisfy (λ _, true)
@[noinline] def nodeInfo (p : ParserInfo) : ParserInfo :=
{ updateTokens := p.updateTokens,
firstToken := p.firstToken }
@[specialize] partial def takeUntilAux (p : Char → Bool) : ParserM σ δ μ Unit
| str inp :=
match inp with
| ⟨Result.ok _ i st bst, _⟩ :=
if str.atEnd i then inp.val
else let c := str.get i in
if p c then inp.val
else takeUntilAux str (mkInput (str.next i) st bst)
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
@[inline] def orelseFn (p q : ParserFn) : ParserFn
| s d :=
let iniSz := d.stackSize in
let iniPos := d.pos in
let d := p s d in
if d.hasError && d.pos == iniPos then q s (d.restore iniSz iniPos) else d
@[inline] def takeUntil (p : Char → Bool) : ParserM σ δ μ Unit :=
λ str inp, takeUntilAux p str inp
@[noinline] def orelseInfo (p q : ParserInfo) : ParserInfo :=
{ updateTokens := q.updateTokens ∘ p.updateTokens,
firstToken :=
match p.firstToken, q.firstToken with
| some tk₁, some tk₂ := if tk₁ == tk₂ then some tk₁ else none
| _, _ := none }
partial def strAux (s : String) (errorMsg : String) (initPos : Pos) : Pos → ParserM σ δ μ Unit
| j str inp :=
if s.atEnd j then inp.val
else match inp with
| ⟨Result.ok _ i st bst, _⟩ :=
if str.atEnd i then Result.error errorMsg initPos st none
else if s.get j == str.get i then strAux (s.next j) str (mkInput (str.next i) st bst)
else Result.error errorMsg initPos st none
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
def ParserData.resetPos : ParserData → String.Pos → ParserData
| ⟨stack, _, cache, errorMsg⟩ pos := ⟨stack, pos, cache, errorMsg⟩
@[inline] def str (s : String) : ParserM σ δ μ Unit :=
λ str inp,
let initPos := currPos inp in
strAux s ("expected " ++ repr s) initPos 0 str inp
@[inline] def tryFn (p : ParserFn) : ParserFn
| s d :=
let iniPos := d.pos in
let d := p s d in
if d.hasError then d.resetPos iniPos else d
@[specialize] partial def manyAux (a : α) (p : ParserM σ δ μ α) : ParserM σ δ μ α
| str inp :=
match inp with
| ⟨Result.ok _ i₀ _ bst₀, _⟩ :=
(match p str inp with
| Result.ok _ i st bst := manyAux str (mkInput i st bst)
| Result.error _ _ st _ := Result.ok a i₀ st bst₀)
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
@[noinline] def noFirstTokenInfo (info : ParserInfo) : ParserInfo :=
{ updateTokens := info.updateTokens,
firstToken := none }
@[inline] def many (p : ParserM σ δ μ Unit) : ParserM σ δ μ Unit :=
manyAux () p
@[inline] def optionalFn (p : ParserFn) : ParserFn :=
λ s d,
let iniSz := d.stackSize in
let iniPos := d.pos in
let d := p s d in
let d := if d.hasError then d.restore iniSz iniPos else d in
d.mkNode nullKind iniSz
@[inline] def many1 (p : ParserM σ δ μ Unit) : ParserM σ δ μ Unit :=
p *> many p
def ParserData.mkError (d : ParserData) (msg : String) : ParserData :=
match d with
| ⟨stack, pos, cache, _⟩ := ⟨stack, pos, cache, some msg⟩
def pos : ParserM σ δ μ Pos :=
λ str inp,
match inp with
| ⟨Result.ok _ i st bst, _⟩ := Result.ok i i st bst
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
def ParserData.mkEOIError (d : ParserData) : ParserData :=
d.mkError "end of input"
def error {α : Type} (msg : String) : ParserM σ δ μ α :=
λ _ inp, mkError inp msg
@[specialize] partial def manyAux (p : ParserFn) : String → ParserData → ParserData
| s d :=
let iniSz := d.stackSize in
let iniPos := d.pos in
let d := p s d in
if d.hasError then d.restore iniSz iniPos
else if iniPos == d.pos then d.mkError "invalid 'many' parser combinator application, parser did not consume anything"
else manyAux s d
def errorAt {α : Type} (pos : Pos) (msg : String) : ParserM σ δ μ α :=
λ _ inp, match inp with
| ⟨Result.ok _ _ st _, _⟩ := Result.error msg pos st none
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
@[inline] def manyFn (p : ParserFn) : ParserFn
| s d :=
let iniSz := d.stackSize in
let d := manyAux p s d in
d.mkNode nullKind iniSz
def hexDigit : ParserM σ δ μ Nat
| str inp := match inp with
| ⟨Result.ok _ i st bst, _⟩ :=
if str.atEnd i then mkError inp "unexpected end of input"
else
let c := str.get i in
let i := str.next i in
if c.isDigit then Result.ok (c.toNat - '0'.toNat) i st bst
else if 'a' <= c && c <= 'f' then Result.ok (10 + c.toNat - 'a'.toNat) i st bst
else if 'A' <= c && c <= 'F' then Result.ok (10 + c.toNat - 'A'.toNat) i st bst
else mkError inp "invalid hexadecimal numeral, hexadecimal digit expected"
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
@[specialize] partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
if p c then d.next s i
else d.mkError errorMsg
def quotedChar : ParserM σ δ μ Char :=
do p ← pos,
c ← any,
if c = '\\' then pure '\\'
else if c = '\"' then pure '\"'
else if c = '\'' then pure '\''
else if c = 'n' then pure '\n'
else if c = 't' then pure '\t'
else if c = 'x' then do {
d₁ ← hexDigit,
d₂ ← hexDigit,
pure $ Char.ofNat (16*d₁ + d₂) }
else if c = 'u' then do {
d₁ ← hexDigit,
d₂ ← hexDigit,
d₃ ← hexDigit,
d₄ ← hexDigit,
pure $ Char.ofNat (16*(16*(16*d₁ + d₂) + d₃) + d₄) }
else errorAt p "invalid escape sequence"
@[specialize] partial def takeUntilFn (p : Char → Bool) : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d
else
let c := s.get i in
if p c then d
else takeUntilFn s (d.next s i)
partial def strLitAux : String → ParserM σ δ μ String
| out := do
c ← any,
if c == '\"' then pure out
else if c == '\\' then do c ← quotedChar, strLitAux (out.push c)
else strLitAux (out.push c)
@[specialize] def takeWhileFn (p : Char → Bool) : ParserFn :=
takeUntilFn (λ c, !p c)
def strLit : ParserM σ δ μ String :=
satisfy (== '\"') "expected string literal" *> strLitAux ""
@[inline] def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : ParserFn :=
andthenFn (satisfyFn p errorMsg) (takeWhileFn p)
partial def finishCommentBlock : Nat → ParserM σ δ μ Unit
| nesting str inp :=
match inp with
| ⟨Result.ok _ i st bst, _⟩ :=
if str.atEnd i then mkError inp "end of input"
else
let c := str.get i in
let i := str.next i in
partial def finishCommentBlock : Nat → ParserFn
| nesting s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
let i := s.next i in
if c == '-' then
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
if c == '/' then -- "-/" end of comment
if nesting == 1 then d.next s i
else finishCommentBlock (nesting-1) s (d.next s i)
else
finishCommentBlock nesting s (d.next s i)
else if c == '/' then
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
if c == '-' then finishCommentBlock (nesting+1) s (d.next s i)
else finishCommentBlock nesting s (d.setPos i)
else finishCommentBlock nesting s (d.setPos i)
/- Consume whitespace and comments -/
partial def whitespace : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d
else
let c := s.get i in
if c.isWhitespace then whitespace s (d.next s i)
else if c == '-' then
let i := s.next i in
let c := s.get i in
if c == '-' then andthenFn (takeUntilFn (= '\n')) whitespace s (d.next s i)
else d
else if c == '/' then
let i := s.next i in
let c := s.get i in
if c == '-' then
if str.atEnd i then mkError inp "end of input"
else
let c := str.get i in
if c == '/' then -- "-/" end of comment
if nesting == 1 then Result.ok () (str.next i) st bst
else finishCommentBlock (nesting-1) str (mkInput (str.next i) st bst)
else
finishCommentBlock nesting str (mkInput i st bst)
else if c == '/' then
if str.atEnd i then mkError inp "end of input"
else
let c := str.get i in
if c == '-' then finishCommentBlock (nesting+1) str (mkInput (str.next i) st bst)
else finishCommentBlock nesting str (mkInput i st bst)
else finishCommentBlock nesting str (mkInput i st bst)
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
let i := s.next i in
let c := s.get i in
if c == '-' then d -- "/--" doc comment is an actual token
else andthenFn (finishCommentBlock 1) whitespace s (d.next s i)
else d
else d
partial def leanWhitespace : ParserM σ δ μ Unit
| str inp :=
match inp with
| ⟨Result.ok _ i st bst, _⟩ :=
if str.atEnd i then inp.val
def mkEmptySubstringAt (s : String) (p : Nat) : Substring :=
{str := s, startPos := p, stopPos := p }
private def rawAux (startPos : Nat) (trailingWs : Bool) : ParserFn
| s d :=
let stopPos := d.pos in
let leading := mkEmptySubstringAt s startPos in
let val := s.extract startPos stopPos in
if trailingWs then
let d := whitespace s d in
let stopPos' := d.pos in
let trailing : Substring := { str := s, startPos := stopPos, stopPos := stopPos' } in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
d.pushSyntax atom
else
let trailing := mkEmptySubstringAt s stopPos in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
d.pushSyntax atom
/-- Match an arbitrary Parser and return the consumed String in a `Syntax.atom`. -/
@[inline] def rawFn (p : ParserFn) (trailingWs := false) : ParserFn
| s d :=
let startPos := d.pos in
let d := p s d in
if d.hasError then d else rawAux startPos trailingWs s d
def hexDigitFn : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
let i := s.next i in
if c.isDigit || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') then d.setPos i
else d.mkError "invalid hexadecimal numeral, hexadecimal digit expected"
def quotedCharFn : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
if c == '\\' || c == '\"' || c == '\'' || c == '\n' || c == '\t' then
d.next s i
else if c == 'x' then
andthenFn hexDigitFn hexDigitFn s (d.next s i)
else if c == 'u' then
andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) s (d.next s i)
else
let c := str.get i in
if c.isWhitespace then leanWhitespace str (mkInput (str.next i) st bst)
else if c == '-' then
let i := str.next i in
let c := str.get i in
if c == '-' then ((takeUntil (= '\n')) *> leanWhitespace) str (mkInput i st bst)
else inp.val
else if c == '/' then
let i := str.next i in
let c := str.get i in
if c == '-' then
let i := str.next i in
let c := str.get i in
if c == '-' then inp.val -- "/--" doc comment is an actual token
else ((finishCommentBlock 1) *> leanWhitespace) str (mkInput i st bst)
else inp.val
else inp.val
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
d.mkError "invalid escape sequence"
end ParserM
def mkStrLitKind : IO SyntaxNodeKind := nextKind `strLit
@[init mkStrLitKind] constant strLitKind : SyntaxNodeKind := default _
class monadParser (σ : outParam Type) (δ : outParam Type) (μ : outParam Type) (m : Type → Type) :=
(lift {} {α : Type} : ParserM σ δ μ α → m α)
(map {} {α : Type} : (Π β, ParserM σ δ μ β → ParserM σ δ μ β) → m α → m α)
/-- Push `(Syntax.node tk <new-atom>)` into syntax stack -/
def mkNodeToken (k : SyntaxNodeKind) (startPos : Nat) (s : String) (d : ParserData) : ParserData :=
let stopPos := d.pos in
let leading := mkEmptySubstringAt s startPos in
let val := s.extract startPos stopPos in
let d := whitespace s d in
let wsStopPos := d.pos in
let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
let tk := Syntax.node k (Array.singleton atom) [] in
d.pushSyntax tk
instance monadParserBase : monadParser σ δ μ (ParserM σ δ μ) :=
{ lift := λ α, id,
map := λ α f x, f α x }
partial def strLitFnAux (startPos : Nat) : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
let d := d.setPos (s.next i) in
if c == '\"' then
mkNodeToken strLitKind startPos s d
else if c == '\\' then andthenFn quotedCharFn strLitFnAux s d
else strLitFnAux s d
instance monadParserTrans {m n : Type → Type} [HasMonadLift m n] [MonadFunctor m m n n] [monadParser σ δ μ m] : monadParser σ δ μ n :=
{ lift := λ α p, monadLift (monadParser.lift p : m α),
map := λ α f x, monadMap (λ β x, (monadParser.map @f x : m β)) x }
def mkNumberKind : IO SyntaxNodeKind := nextKind `number
@[init mkNumberKind] constant numberKind : SyntaxNodeKind := default _
class monadParserAux (σ : outParam Type) (δ : outParam Type) (μ : outParam Type) (m : Type → Type) :=
(map {} {α : Type} : (ParserM σ δ μ α → ParserM σ δ μ α) → m α → m α)
def decimalNumberFn (startPos : Nat) : ParserFn
| s d :=
let d := takeWhileFn (λ c, c.isDigit) s d in
let i := d.pos in
let c := s.get i in
let d :=
if c == '.' then
let i := s.next i in
let c := s.get i in
if c.isDigit then
takeWhileFn (λ c, c.isDigit) s (d.setPos i)
else
d
else
d in
mkNodeToken numberKind startPos s d
instance monadParserAuxBase : monadParserAux σ δ μ (ParserM σ δ μ) :=
{ map := λ α, id }
def binNumberFn (startPos : Nat) : ParserFn
| s d :=
let d := takeWhile1Fn (λ c, c == '0' || c == '1') "expected binary number" s d in
mkNodeToken numberKind startPos s d
instance monadParserAuxReader {m : Type → Type} {ρ : Type} [Monad m] [monadParserAux σ δ μ m] : monadParserAux σ δ μ (ReaderT ρ m) :=
{ map := λ α f x r, (monadParserAux.map f : m α → m α) (x r) }
def octalNumberFn (startPos : Nat) : ParserFn
| s d :=
let d := takeWhile1Fn (λ c, '0' ≤ c && c ≤ '7') "expected octal number" s d in
mkNodeToken numberKind startPos s d
section
variables {m : Type → Type} [monadParser σ δ μ m]
def hexNumberFn (startPos : Nat) : ParserFn
| s d :=
let d := takeWhile1Fn (λ c, ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')) "expected hexadecimal number" s d in
mkNodeToken numberKind startPos s d
@[inline] def satisfy (p : Char → Bool) : m Char := monadParser.lift (ParserM.satisfy p)
def ch (c : Char) : m Char := monadParser.lift (ParserM.ch c)
def alpha : m Char := satisfy Char.isAlpha
def digit : m Char := satisfy Char.isDigit
def upper : m Char := satisfy Char.isUpper
def lower : m Char := satisfy Char.isLower
def any : m Char := satisfy (λ _, true)
def numberFnAux : ParserFn
| s d :=
let startPos := d.pos in
if s.atEnd startPos then d.mkEOIError
else
let c := s.get startPos in
if c == '0' then
let i := s.next startPos in
let c := s.get i in
if c == 'b' || c == 'B' then
binNumberFn startPos s (d.next s i)
else if c == 'o' || c == 'O' then
octalNumberFn startPos s (d.next s i)
else if c == 'x' || c == 'X' then
hexNumberFn startPos s (d.next s i)
else
decimalNumberFn startPos s (d.next s i)
else if c.isDigit then
decimalNumberFn startPos s (d.next s startPos)
else
d.mkError "expected numeral"
@[inline] def takeUntil (p : Char → Bool) : m Unit := monadParser.lift (ParserM.takeUntil p)
def isIdCont : String → ParserData → Bool
| s d :=
let i := d.pos in
let c := s.get i in
if c == '.' then
let i := s.next i in
if s.atEnd i then
false
else
let c := s.get i in
isIdFirst c || isIdBeginEscape c
else
false
@[inline] def str (s : String) : m Unit := monadParser.lift (ParserM.str s)
private def isToken (idStartPos idStopPos : Nat) (tk : Option TokenConfig) : Bool :=
match tk with
| none := false
| some tk :=
-- if a token is both a symbol and a valid identifier (i.e. a keyword),
-- we want it to be recognized as a symbol
tk.val.bsize ≥ idStopPos - idStopPos
@[inline] def try (p : m α) : m α :=
monadParser.map (λ _ p, ParserM.try p) p
@[inline] def lookahead (p : m α) : m α :=
monadParser.map (λ _ p, ParserM.lookahead p) p
def mkTokenAndFixPos (startPos : Nat) (tk : Option TokenConfig) (s : String) (d : ParserData) : ParserData :=
match tk with
| none :=
let d := d.setPos startPos in
d.mkError "token expected"
| some tk :=
let leading := mkEmptySubstringAt s startPos in
let val := tk.val in
let stopPos := startPos + val.bsize in
let d := d.setPos stopPos in
let wsStopPos := d.pos in
let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
d.pushSyntax atom
@[inline] def takeWhile (p : Char → Bool) : m Unit := takeUntil (λ c, !p c)
def mkIdResult (startPos : Nat) (tk : Option TokenConfig) (val : Name) (s : String) (d : ParserData) : ParserData :=
let stopPos := d.pos in
if isToken startPos stopPos tk then
mkTokenAndFixPos startPos tk s d
else
let rawVal : Substring := { str := s, startPos := startPos, stopPos := stopPos } in
let d := whitespace s d in
let trailingStopPos := d.pos in
let leading := mkEmptySubstringAt s startPos in
let trailing : Substring := { str := s, startPos := stopPos, stopPos := trailingStopPos } in
let info : SourceInfo := {leading := leading, trailing := trailing, pos := startPos} in
let atom := Syntax.ident (some info) rawVal val [] [] in
d.pushSyntax atom
@[inline] def whitespace : m Unit := takeWhile Char.isWhitespace
partial def identFnAux (startPos : Nat) (tk : Option TokenConfig) : Name → ParserFn
| r s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
if isIdBeginEscape c then
let startPart := s.next i in
let d := takeUntilFn isIdEndEscape s (d.setPos startPart) in
let stopPart := d.pos in
let d := satisfyFn isIdEndEscape "end of escaped identifier expected" s d in
if d.hasError then d
else
let r := Name.mkString r (s.extract startPart stopPart) in
if isIdCont s d then
identFnAux r s d
else
mkIdResult startPos tk r s d
else if isIdFirst c then
let startPart := i in
let d := takeWhileFn isIdRest s (d.next s i) in
let stopPart := d.pos in
let r := Name.mkString r (s.extract startPart stopPart) in
if isIdCont s d then
identFnAux r s d
else
mkIdResult startPart tk r s d
else
mkTokenAndFixPos startPos tk s d
@[inline] def strLit : m String := monadParser.lift (ParserM.strLit)
structure AbsParser (ρ : Type) :=
(info : Thunk ParserInfo := Thunk.mk (λ _, {}))
(fn : ρ)
@[inline] def leanWhitespace : m Unit := monadParser.lift ParserM.leanWhitespace
end
abbrev Parser := AbsParser ParserFn
section
variables {m : Type → Type} [monadParserAux σ δ μ m]
class ParserFnLift (ρ : Type) :=
(lift {} : ParserFn → ρ)
(map : (ParserFn → ParserFn) → ρρ)
(map₂ : (ParserFn → ParserFn → ParserFn) → ρρρ)
@[inline] def many (p : m Unit) : m Unit := monadParserAux.map ParserM.many p
@[inline] def many1 (p : m Unit) : m Unit := monadParserAux.map ParserM.many1 p
instance parserLiftInhabited {ρ : Type} [ParserFnLift ρ] : Inhabited ρ :=
⟨ParserFnLift.lift (default _)⟩
end
instance idParserLift : ParserFnLift ParserFn :=
{ lift := λ p, p,
map := λ m p, m p,
map₂ := λ m p1 p2, m p1 p2 }
@[inline]
def liftParser {ρ : Type} [ParserFnLift ρ] (info : Thunk ParserInfo) (fn : ParserFn) : AbsParser ρ :=
{ info := info, fn := ParserFnLift.lift fn }
@[inline]
def mapParser {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn) : AbsParser ρ → AbsParser ρ :=
λ p, { info := Thunk.mk (λ _, infoFn p.info.get), fn := ParserFnLift.map pFn p.fn }
@[inline]
def mapParser₂ {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn → ParserFn)
: AbsParser ρ → AbsParser ρ → AbsParser ρ :=
λ p q, { info := Thunk.mk (λ _, infoFn p.info.get q.info.get), fn := ParserFnLift.map₂ pFn p.fn q.fn }
def EnvParserFn (α : Type) (ρ : Type) :=
αρ
def RecParserFn (α ρ : Type) :=
EnvParserFn (αρ) ρ
instance envParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (EnvParserFn α ρ) :=
{ lift := λ p a, ParserFnLift.lift p,
map := λ m p a, ParserFnLift.map m (p a),
map₂ := λ m p1 p2 a, ParserFnLift.map₂ m (p1 a) (p2 a) }
instance recParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (RecParserFn α ρ) :=
inferInstanceAs (ParserFnLift (EnvParserFn (αρ) ρ))
namespace RecParserFn
variables {α ρ : Type}
@[inline] def recurse (a : α) : RecParserFn α ρ :=
λ p, p a
@[inline] def run [ParserFnLift ρ] (x : RecParserFn α ρ) (rec : α → RecParserFn α ρ) : ρ :=
x (fix (λ f a, rec a f))
end RecParserFn
@[inline] def andthen {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ :=
mapParser₂ andthenInfo andthenFn
@[inline] def node {ρ : Type} [ParserFnLift ρ] (k : SyntaxNodeKind) : AbsParser ρ → AbsParser ρ :=
mapParser nodeInfo (nodeFn k)
@[inline] def orelse {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ :=
mapParser₂ orelseInfo orelseFn
@[inline] def try {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
mapParser noFirstTokenInfo tryFn
@[inline] def many {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
mapParser noFirstTokenInfo manyFn
@[inline] def optional {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
mapParser noFirstTokenInfo optionalFn
@[inline] def many1 {ρ : Type} [ParserFnLift ρ] (p : AbsParser ρ) : AbsParser ρ :=
andthen p (many p)
abbrev BasicParserFn : Type := EnvParserFn ParserConfig ParserFn
abbrev BasicParser : Type := AbsParser BasicParserFn
abbrev CmdParserFn (ρ : Type) : Type := EnvParserFn ρ (RecParserFn Unit ParserFn)
abbrev TermParserFn : Type := RecParserFn Nat (CmdParserFn ParserConfig)
abbrev TermParser : Type := AbsParser TermParserFn
abbrev TrailingTermParser : Type := AbsParser (EnvParserFn Syntax TermParserFn)
def TokenMap (α : Type) := RBMap Name (List α) Name.quickLt
structure CommandParserConfig extends ParserConfig :=
(leadingTermParsers : TokenMap TermParser)
(trailingTermParsers : TokenMap TrailingTermParser)
-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them
(localLeadingTermParsers : TokenMap TermParser := mkRBMap _ _ _)
(localTrailingTermParsers : TokenMap TrailingTermParser := mkRBMap _ _ _)
abbrev CommandParser : Type := AbsParser (CmdParserFn CommandParserConfig)
@[inline] def Term.parser (rbp : Nat := 0) : TermParser :=
{ fn := RecParserFn.recurse rbp }
@[inline] def Command.parser : CommandParser :=
{ fn := λ _, RecParserFn.recurse () }
@[inline] def basicParser2TermParser (p : BasicParser) : TermParser :=
{ info := Thunk.mk (λ _, p.info.get), fn := λ _ cfg _, p.fn cfg }
instance basic2term : HasCoe BasicParser TermParser :=
⟨basicParser2TermParser⟩
@[inline] def basicParser2CmdParser (p : BasicParser) : CommandParser :=
{ info := Thunk.mk (λ _, p.info.get), fn := λ cfg _, p.fn cfg.toParserConfig }
instance basicmd : HasCoe BasicParser CommandParser :=
⟨basicParser2CmdParser⟩
private def tokenFnAux : BasicParserFn
| cfg s d :=
let i := d.pos in
let c := s.get i in
if c == '\"' then
strLitFnAux i s (d.next s i)
else if c.isDigit then
numberFnAux s d
else
let (_, tk) := cfg.tokens.matchPrefix s i in
identFnAux i tk Name.anonymous s d
private def updateCache (startPos : Nat) (d : ParserData) : ParserData :=
match d with
| ⟨stack, pos, cache, none⟩ :=
if stack.size == 0 then d
else
let tk := stack.back in
⟨stack, pos, { tokenCache := some { startPos := startPos, stopPos := pos, token := tk } }, none⟩
| other := other
def tokenFn : BasicParserFn
| cfg s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
match d.cache with
| { tokenCache := some tkc } :=
if tkc.startPos == i then
let d := d.pushSyntax tkc.token in
d.setPos tkc.stopPos
else
let d := tokenFnAux cfg s d in
updateCache i d
| _ :=
let d := tokenFnAux cfg s d in
updateCache i d
def symbolFnAux (sym : String) (errorMsg : String) : BasicParserFn
| cfg s d :=
let d := tokenFn cfg s d in
if d.hasError then
d.mkError errorMsg
else
match d.stxStack.back with
| Syntax.atom _ sym' := if sym == sym' then d else d.mkError errorMsg
| _ := d.mkError errorMsg
@[inline] def symbolFn (sym : String) : BasicParserFn :=
symbolFnAux sym ("expected '" ++ sym ++ "'")
def symbolInfo (sym : String) (lbp : Nat) : ParserInfo :=
{ updateTokens := λ trie, trie.insert sym { val := sym, lbp := lbp },
firstToken := some { val := sym, lbp := lbp } }
@[inline] def symbol (sym : String) (lbp : Nat := 0) : BasicParser :=
{ info := Thunk.mk (λ _, symbolInfo sym lbp),
fn := symbolFn sym }
def numberFn : BasicParserFn
| cfg s d :=
let d := tokenFn cfg s d in
if d.hasError || !(d.stxStack.back.isOfKind numberKind) then
d.mkError "expected numeral"
else
d
@[inline] def number : BasicParser :=
{ fn := numberFn }
def strLitFn : BasicParserFn
| cfg s d :=
let d := tokenFn cfg s d in
if d.hasError || !(d.stxStack.back.isOfKind strLitKind) then
d.mkError "expected string literal"
else
d
@[inline] def strLit : BasicParser :=
{ fn := numberFn }
def mkFrontendConfig (filename input : String) : FrontendConfig :=
{ filename := filename,
input := input,
fileMap := input.toFileMap }
def BasicParser.run (p : BasicParser) (input : String) (filename : String := "<input>") : Except String Syntax :=
let frontendCfg := mkFrontendConfig filename input in
let tokens := p.info.get.updateTokens {} in
let cfg : ParserConfig := { tokens := tokens, .. frontendCfg } in
let d : ParserData := { stxStack := Array.empty, pos := 0, cache := {}, errorMsg := none } in
let d := p.fn cfg input d in
if d.hasError then
Except.error (d.toErrorMsg cfg)
else
Except.ok d.stxStack.back
end Parser
end Lean
-- end Lean

View file

@ -1,716 +0,0 @@
import init.lean.name init.lean.parser.trie init.lean.parser.identifier
import syntax filemap
open Lean
export Lean.Parser (Trie)
-- namespace Lean
namespace Parser
structure FrontendConfig :=
(filename : String)
(input : String)
(fileMap : FileMap)
structure TokenConfig :=
(val : String)
(lbp : Nat := 0)
namespace TokenConfig
def beq : TokenConfig → TokenConfig → Bool
| ⟨val₁, lbp₁⟩ ⟨val₂, lbp₂⟩ := val₁ == val₂ && lbp₁ == lbp₂
instance : HasBeq TokenConfig :=
⟨beq⟩
end TokenConfig
structure TokenCacheEntry :=
(startPos stopPos : String.Pos)
(token : Syntax)
structure ParserCache :=
(tokenCache : Option TokenCacheEntry := none)
structure ParserConfig extends FrontendConfig :=
(tokens : Trie TokenConfig)
structure ParserData :=
(stxStack : Array Syntax) (pos : String.Pos) (cache : ParserCache) (errorMsg : Option String)
@[inline] def ParserData.hasError (d : ParserData) : Bool :=
d.errorMsg != none
@[inline] def ParserData.stackSize (d : ParserData) : Nat :=
d.stxStack.size
def ParserData.restore (d : ParserData) (iniStackSz : Nat) (iniPos : Nat) : ParserData :=
match d with
| ⟨stack, _, cache, _⟩ := ⟨stack.shrink iniStackSz, iniPos, cache, none⟩
def ParserData.setPos (d : ParserData) (pos : Nat) : ParserData :=
match d with
| ⟨stack, _, cache, msg⟩ := ⟨stack, pos, cache, msg⟩
def ParserData.setCache (d : ParserData) (cache : ParserCache) : ParserData :=
match d with
| ⟨stack, pos, _, msg⟩ := ⟨stack, pos, cache, msg⟩
def ParserData.pushSyntax (d : ParserData) (n : Syntax) : ParserData :=
match d with
| ⟨stack, pos, cache, msg⟩ := ⟨stack.push n, pos, cache, msg⟩
def ParserData.next (d : ParserData) (s : String) (pos : Nat) : ParserData :=
d.setPos (s.next pos)
def ParserData.toErrorMsg (d : ParserData) (cfg : ParserConfig) : String :=
match d.errorMsg with
| none := ""
| some msg :=
let pos := cfg.fileMap.toPosition d.pos in
cfg.filename ++ ":" ++ toString pos.line ++ ":" ++ toString pos.column ++ " " ++ msg
def ParserFn := String → ParserData → ParserData
instance : Inhabited ParserFn :=
⟨λ s, id⟩
structure ParserInfo :=
(updateTokens : Trie TokenConfig → Trie TokenConfig := λ tks, tks)
(firstToken : Option TokenConfig := none)
@[inline] def andthenFn (p q : ParserFn) : ParserFn
| s d :=
let d := p s d in
if d.hasError then d else q s d
@[noinline] def andthenInfo (p q : ParserInfo) : ParserInfo :=
{ updateTokens := q.updateTokens ∘ p.updateTokens,
firstToken := p.firstToken }
def ParserData.mkNode (d : ParserData) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserData :=
match d with
| ⟨stack, pos, cache, err⟩ :=
if err != none && stack.size == iniStackSz then
-- If there is an error but there are no new nodes on the stack, we just return `d`
d
else
let newNode := Syntax.node k (stack.extract iniStackSz stack.size) [] in
let stack := stack.shrink iniStackSz in
let stack := stack.push newNode in
⟨stack, pos, cache, err⟩
@[inline] def nodeFn (k : SyntaxNodeKind) (p : ParserFn) : ParserFn
| s d :=
let iniSz := d.stackSize in
let d := p s d in
d.mkNode k iniSz
@[noinline] def nodeInfo (p : ParserInfo) : ParserInfo :=
{ updateTokens := p.updateTokens,
firstToken := p.firstToken }
@[inline] def orelseFn (p q : ParserFn) : ParserFn
| s d :=
let iniSz := d.stackSize in
let iniPos := d.pos in
let d := p s d in
if d.hasError && d.pos == iniPos then q s (d.restore iniSz iniPos) else d
@[noinline] def orelseInfo (p q : ParserInfo) : ParserInfo :=
{ updateTokens := q.updateTokens ∘ p.updateTokens,
firstToken :=
match p.firstToken, q.firstToken with
| some tk₁, some tk₂ := if tk₁ == tk₂ then some tk₁ else none
| _, _ := none }
def ParserData.resetPos : ParserData → String.Pos → ParserData
| ⟨stack, _, cache, errorMsg⟩ pos := ⟨stack, pos, cache, errorMsg⟩
@[inline] def tryFn (p : ParserFn) : ParserFn
| s d :=
let iniPos := d.pos in
let d := p s d in
if d.hasError then d.resetPos iniPos else d
@[noinline] def noFirstTokenInfo (info : ParserInfo) : ParserInfo :=
{ updateTokens := info.updateTokens,
firstToken := none }
@[inline] def optionalFn (p : ParserFn) : ParserFn :=
λ s d,
let iniSz := d.stackSize in
let iniPos := d.pos in
let d := p s d in
let d := if d.hasError then d.restore iniSz iniPos else d in
d.mkNode nullKind iniSz
def ParserData.mkError (d : ParserData) (msg : String) : ParserData :=
match d with
| ⟨stack, pos, cache, _⟩ := ⟨stack, pos, cache, some msg⟩
def ParserData.mkEOIError (d : ParserData) : ParserData :=
d.mkError "end of input"
@[specialize] partial def manyAux (p : ParserFn) : String → ParserData → ParserData
| s d :=
let iniSz := d.stackSize in
let iniPos := d.pos in
let d := p s d in
if d.hasError then d.restore iniSz iniPos
else if iniPos == d.pos then d.mkError "invalid 'many' parser combinator application, parser did not consume anything"
else manyAux s d
@[inline] def manyFn (p : ParserFn) : ParserFn
| s d :=
let iniSz := d.stackSize in
let d := manyAux p s d in
d.mkNode nullKind iniSz
@[specialize] partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
if p c then d.next s i
else d.mkError errorMsg
@[specialize] partial def takeUntilFn (p : Char → Bool) : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d
else
let c := s.get i in
if p c then d
else takeUntilFn s (d.next s i)
@[specialize] def takeWhileFn (p : Char → Bool) : ParserFn :=
takeUntilFn (λ c, !p c)
@[inline] def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : ParserFn :=
andthenFn (satisfyFn p errorMsg) (takeWhileFn p)
partial def finishCommentBlock : Nat → ParserFn
| nesting s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
let i := s.next i in
if c == '-' then
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
if c == '/' then -- "-/" end of comment
if nesting == 1 then d.next s i
else finishCommentBlock (nesting-1) s (d.next s i)
else
finishCommentBlock nesting s (d.next s i)
else if c == '/' then
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
if c == '-' then finishCommentBlock (nesting+1) s (d.next s i)
else finishCommentBlock nesting s (d.setPos i)
else finishCommentBlock nesting s (d.setPos i)
/- Consume whitespace and comments -/
partial def whitespace : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d
else
let c := s.get i in
if c.isWhitespace then whitespace s (d.next s i)
else if c == '-' then
let i := s.next i in
let c := s.get i in
if c == '-' then andthenFn (takeUntilFn (= '\n')) whitespace s (d.next s i)
else d
else if c == '/' then
let i := s.next i in
let c := s.get i in
if c == '-' then
let i := s.next i in
let c := s.get i in
if c == '-' then d -- "/--" doc comment is an actual token
else andthenFn (finishCommentBlock 1) whitespace s (d.next s i)
else d
else d
def mkEmptySubstringAt (s : String) (p : Nat) : Substring :=
{str := s, startPos := p, stopPos := p }
private def rawAux (startPos : Nat) (trailingWs : Bool) : ParserFn
| s d :=
let stopPos := d.pos in
let leading := mkEmptySubstringAt s startPos in
let val := s.extract startPos stopPos in
if trailingWs then
let d := whitespace s d in
let stopPos' := d.pos in
let trailing : Substring := { str := s, startPos := stopPos, stopPos := stopPos' } in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
d.pushSyntax atom
else
let trailing := mkEmptySubstringAt s stopPos in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
d.pushSyntax atom
/-- Match an arbitrary Parser and return the consumed String in a `Syntax.atom`. -/
@[inline] def rawFn (p : ParserFn) (trailingWs := false) : ParserFn
| s d :=
let startPos := d.pos in
let d := p s d in
if d.hasError then d else rawAux startPos trailingWs s d
def hexDigitFn : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
let i := s.next i in
if c.isDigit || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') then d.setPos i
else d.mkError "invalid hexadecimal numeral, hexadecimal digit expected"
def quotedCharFn : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
if c == '\\' || c == '\"' || c == '\'' || c == '\n' || c == '\t' then
d.next s i
else if c == 'x' then
andthenFn hexDigitFn hexDigitFn s (d.next s i)
else if c == 'u' then
andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) s (d.next s i)
else
d.mkError "invalid escape sequence"
def mkStrLitKind : IO SyntaxNodeKind := nextKind `strLit
@[init mkStrLitKind] constant strLitKind : SyntaxNodeKind := default _
/-- Push `(Syntax.node tk <new-atom>)` into syntax stack -/
def mkNodeToken (k : SyntaxNodeKind) (startPos : Nat) (s : String) (d : ParserData) : ParserData :=
let stopPos := d.pos in
let leading := mkEmptySubstringAt s startPos in
let val := s.extract startPos stopPos in
let d := whitespace s d in
let wsStopPos := d.pos in
let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
let tk := Syntax.node k (Array.singleton atom) [] in
d.pushSyntax tk
partial def strLitFnAux (startPos : Nat) : ParserFn
| s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
let d := d.setPos (s.next i) in
if c == '\"' then
mkNodeToken strLitKind startPos s d
else if c == '\\' then andthenFn quotedCharFn strLitFnAux s d
else strLitFnAux s d
def mkNumberKind : IO SyntaxNodeKind := nextKind `number
@[init mkNumberKind] constant numberKind : SyntaxNodeKind := default _
def decimalNumberFn (startPos : Nat) : ParserFn
| s d :=
let d := takeWhileFn (λ c, c.isDigit) s d in
let i := d.pos in
let c := s.get i in
let d :=
if c == '.' then
let i := s.next i in
let c := s.get i in
if c.isDigit then
takeWhileFn (λ c, c.isDigit) s (d.setPos i)
else
d
else
d in
mkNodeToken numberKind startPos s d
def binNumberFn (startPos : Nat) : ParserFn
| s d :=
let d := takeWhile1Fn (λ c, c == '0' || c == '1') "expected binary number" s d in
mkNodeToken numberKind startPos s d
def octalNumberFn (startPos : Nat) : ParserFn
| s d :=
let d := takeWhile1Fn (λ c, '0' ≤ c && c ≤ '7') "expected octal number" s d in
mkNodeToken numberKind startPos s d
def hexNumberFn (startPos : Nat) : ParserFn
| s d :=
let d := takeWhile1Fn (λ c, ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')) "expected hexadecimal number" s d in
mkNodeToken numberKind startPos s d
def numberFnAux : ParserFn
| s d :=
let startPos := d.pos in
if s.atEnd startPos then d.mkEOIError
else
let c := s.get startPos in
if c == '0' then
let i := s.next startPos in
let c := s.get i in
if c == 'b' || c == 'B' then
binNumberFn startPos s (d.next s i)
else if c == 'o' || c == 'O' then
octalNumberFn startPos s (d.next s i)
else if c == 'x' || c == 'X' then
hexNumberFn startPos s (d.next s i)
else
decimalNumberFn startPos s (d.next s i)
else if c.isDigit then
decimalNumberFn startPos s (d.next s startPos)
else
d.mkError "expected numeral"
def isIdCont : String → ParserData → Bool
| s d :=
let i := d.pos in
let c := s.get i in
if c == '.' then
let i := s.next i in
if s.atEnd i then
false
else
let c := s.get i in
isIdFirst c || isIdBeginEscape c
else
false
private def isToken (idStartPos idStopPos : Nat) (tk : Option TokenConfig) : Bool :=
match tk with
| none := false
| some tk :=
-- if a token is both a symbol and a valid identifier (i.e. a keyword),
-- we want it to be recognized as a symbol
tk.val.bsize ≥ idStopPos - idStopPos
def mkTokenAndFixPos (startPos : Nat) (tk : Option TokenConfig) (s : String) (d : ParserData) : ParserData :=
match tk with
| none :=
let d := d.setPos startPos in
d.mkError "token expected"
| some tk :=
let leading := mkEmptySubstringAt s startPos in
let val := tk.val in
let stopPos := startPos + val.bsize in
let d := d.setPos stopPos in
let wsStopPos := d.pos in
let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
d.pushSyntax atom
def mkIdResult (startPos : Nat) (tk : Option TokenConfig) (val : Name) (s : String) (d : ParserData) : ParserData :=
let stopPos := d.pos in
if isToken startPos stopPos tk then
mkTokenAndFixPos startPos tk s d
else
let rawVal : Substring := { str := s, startPos := startPos, stopPos := stopPos } in
let d := whitespace s d in
let trailingStopPos := d.pos in
let leading := mkEmptySubstringAt s startPos in
let trailing : Substring := { str := s, startPos := stopPos, stopPos := trailingStopPos } in
let info : SourceInfo := {leading := leading, trailing := trailing, pos := startPos} in
let atom := Syntax.ident (some info) rawVal val [] [] in
d.pushSyntax atom
partial def identFnAux (startPos : Nat) (tk : Option TokenConfig) : Name → ParserFn
| r s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
let c := s.get i in
if isIdBeginEscape c then
let startPart := s.next i in
let d := takeUntilFn isIdEndEscape s (d.setPos startPart) in
let stopPart := d.pos in
let d := satisfyFn isIdEndEscape "end of escaped identifier expected" s d in
if d.hasError then d
else
let r := Name.mkString r (s.extract startPart stopPart) in
if isIdCont s d then
identFnAux r s d
else
mkIdResult startPos tk r s d
else if isIdFirst c then
let startPart := i in
let d := takeWhileFn isIdRest s (d.next s i) in
let stopPart := d.pos in
let r := Name.mkString r (s.extract startPart stopPart) in
if isIdCont s d then
identFnAux r s d
else
mkIdResult startPart tk r s d
else
mkTokenAndFixPos startPos tk s d
structure AbsParser (ρ : Type) :=
(info : Thunk ParserInfo := Thunk.mk (λ _, {}))
(fn : ρ)
abbrev Parser := AbsParser ParserFn
class ParserFnLift (ρ : Type) :=
(lift {} : ParserFn → ρ)
(map : (ParserFn → ParserFn) → ρρ)
(map₂ : (ParserFn → ParserFn → ParserFn) → ρρρ)
instance parserLiftInhabited {ρ : Type} [ParserFnLift ρ] : Inhabited ρ :=
⟨ParserFnLift.lift (default _)⟩
instance idParserLift : ParserFnLift ParserFn :=
{ lift := λ p, p,
map := λ m p, m p,
map₂ := λ m p1 p2, m p1 p2 }
@[inline]
def liftParser {ρ : Type} [ParserFnLift ρ] (info : Thunk ParserInfo) (fn : ParserFn) : AbsParser ρ :=
{ info := info, fn := ParserFnLift.lift fn }
@[inline]
def mapParser {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn) : AbsParser ρ → AbsParser ρ :=
λ p, { info := Thunk.mk (λ _, infoFn p.info.get), fn := ParserFnLift.map pFn p.fn }
@[inline]
def mapParser₂ {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn → ParserFn)
: AbsParser ρ → AbsParser ρ → AbsParser ρ :=
λ p q, { info := Thunk.mk (λ _, infoFn p.info.get q.info.get), fn := ParserFnLift.map₂ pFn p.fn q.fn }
def EnvParserFn (α : Type) (ρ : Type) :=
αρ
def RecParserFn (α ρ : Type) :=
EnvParserFn (αρ) ρ
instance envParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (EnvParserFn α ρ) :=
{ lift := λ p a, ParserFnLift.lift p,
map := λ m p a, ParserFnLift.map m (p a),
map₂ := λ m p1 p2 a, ParserFnLift.map₂ m (p1 a) (p2 a) }
instance recParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (RecParserFn α ρ) :=
inferInstanceAs (ParserFnLift (EnvParserFn (αρ) ρ))
namespace RecParserFn
variables {α ρ : Type}
@[inline] def recurse (a : α) : RecParserFn α ρ :=
λ p, p a
@[inline] def run [ParserFnLift ρ] (x : RecParserFn α ρ) (rec : α → RecParserFn α ρ) : ρ :=
x (fix (λ f a, rec a f))
end RecParserFn
@[inline] def andthen {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ :=
mapParser₂ andthenInfo andthenFn
@[inline] def node {ρ : Type} [ParserFnLift ρ] (k : SyntaxNodeKind) : AbsParser ρ → AbsParser ρ :=
mapParser nodeInfo (nodeFn k)
@[inline] def orelse {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ :=
mapParser₂ orelseInfo orelseFn
@[inline] def try {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
mapParser noFirstTokenInfo tryFn
@[inline] def many {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
mapParser noFirstTokenInfo manyFn
@[inline] def optional {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
mapParser noFirstTokenInfo optionalFn
@[inline] def many1 {ρ : Type} [ParserFnLift ρ] (p : AbsParser ρ) : AbsParser ρ :=
andthen p (many p)
abbrev BasicParserFn : Type := EnvParserFn ParserConfig ParserFn
abbrev BasicParser : Type := AbsParser BasicParserFn
abbrev CmdParserFn (ρ : Type) : Type := EnvParserFn ρ (RecParserFn Unit ParserFn)
abbrev TermParserFn : Type := RecParserFn Nat (CmdParserFn ParserConfig)
abbrev TermParser : Type := AbsParser TermParserFn
abbrev TrailingTermParser : Type := AbsParser (EnvParserFn Syntax TermParserFn)
def TokenMap (α : Type) := RBMap Name (List α) Name.quickLt
structure CommandParserConfig extends ParserConfig :=
(leadingTermParsers : TokenMap TermParser)
(trailingTermParsers : TokenMap TrailingTermParser)
-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them
(localLeadingTermParsers : TokenMap TermParser := mkRBMap _ _ _)
(localTrailingTermParsers : TokenMap TrailingTermParser := mkRBMap _ _ _)
abbrev CommandParser : Type := AbsParser (CmdParserFn CommandParserConfig)
@[inline] def Term.parser (rbp : Nat := 0) : TermParser :=
{ fn := RecParserFn.recurse rbp }
@[inline] def Command.parser : CommandParser :=
{ fn := λ _, RecParserFn.recurse () }
@[inline] def basicParser2TermParser (p : BasicParser) : TermParser :=
{ info := Thunk.mk (λ _, p.info.get), fn := λ _ cfg _, p.fn cfg }
instance basic2term : HasCoe BasicParser TermParser :=
⟨basicParser2TermParser⟩
@[inline] def basicParser2CmdParser (p : BasicParser) : CommandParser :=
{ info := Thunk.mk (λ _, p.info.get), fn := λ cfg _, p.fn cfg.toParserConfig }
instance basicmd : HasCoe BasicParser CommandParser :=
⟨basicParser2CmdParser⟩
private def tokenFnAux : BasicParserFn
| cfg s d :=
let i := d.pos in
let c := s.get i in
if c == '\"' then
strLitFnAux i s (d.next s i)
else if c.isDigit then
numberFnAux s d
else
let (_, tk) := cfg.tokens.matchPrefix s i in
identFnAux i tk Name.anonymous s d
private def updateCache (startPos : Nat) (d : ParserData) : ParserData :=
match d with
| ⟨stack, pos, cache, none⟩ :=
if stack.size == 0 then d
else
let tk := stack.back in
⟨stack, pos, { tokenCache := some { startPos := startPos, stopPos := pos, token := tk } }, none⟩
| other := other
def tokenFn : BasicParserFn
| cfg s d :=
let i := d.pos in
if s.atEnd i then d.mkEOIError
else
match d.cache with
| { tokenCache := some tkc } :=
if tkc.startPos == i then
let d := d.pushSyntax tkc.token in
d.setPos tkc.stopPos
else
let d := tokenFnAux cfg s d in
updateCache i d
| _ :=
let d := tokenFnAux cfg s d in
updateCache i d
def symbolFnAux (sym : String) (errorMsg : String) : BasicParserFn
| cfg s d :=
let d := tokenFn cfg s d in
if d.hasError then
d.mkError errorMsg
else
match d.stxStack.back with
| Syntax.atom _ sym' := if sym == sym' then d else d.mkError errorMsg
| _ := d.mkError errorMsg
@[inline] def symbolFn (sym : String) : BasicParserFn :=
symbolFnAux sym ("expected '" ++ sym ++ "'")
def symbolInfo (sym : String) (lbp : Nat) : ParserInfo :=
{ updateTokens := λ trie, trie.insert sym { val := sym, lbp := lbp },
firstToken := some { val := sym, lbp := lbp } }
@[inline] def symbol (sym : String) (lbp : Nat := 0) : BasicParser :=
{ info := Thunk.mk (λ _, symbolInfo sym lbp),
fn := symbolFn sym }
def numberFn : BasicParserFn
| cfg s d :=
let d := tokenFn cfg s d in
if d.hasError || !(d.stxStack.back.isOfKind numberKind) then
d.mkError "expected numeral"
else
d
@[inline] def number : BasicParser :=
{ fn := numberFn }
def strLitFn : BasicParserFn
| cfg s d :=
let d := tokenFn cfg s d in
if d.hasError || !(d.stxStack.back.isOfKind strLitKind) then
d.mkError "expected string literal"
else
d
@[inline] def strLit : BasicParser :=
{ fn := numberFn }
def mkFrontendConfig (filename input : String) : FrontendConfig :=
{ filename := filename,
input := input,
fileMap := input.toFileMap }
def BasicParser.run (p : BasicParser) (input : String) (filename : String := "<input>") : Except String Syntax :=
let frontendCfg := mkFrontendConfig filename input in
let tokens := p.info.get.updateTokens {} in
let cfg : ParserConfig := { tokens := tokens, .. frontendCfg } in
let d : ParserData := { stxStack := Array.empty, pos := 0, cache := {}, errorMsg := none } in
let d := p.fn cfg input d in
if d.hasError then
Except.error (d.toErrorMsg cfg)
else
Except.ok d.stxStack.back
local infix ` ; `:10 := Parser.andthen
local infix ` || `:5 := Parser.orelse
def mkTestKind : IO SyntaxNodeKind := nextKind `test
@[init mkTestKind] constant testKind : SyntaxNodeKind := default _
set_option pp.implicit true
set_option pp.binder_types false
set_option trace.compiler.stage2 true
set_option trace.compiler.boxed true
-- set_option trace.compiler.stage1 true
-- set_option trace.compiler.lcnf true
-- set_option trace.compiler.lcnf true
-- set_option trace.compiler.simp true
@[inline2]
def p0 : BasicParser :=
node testKind (symbol "foo"; many (symbol "boo"))
@[inline2]
def p1 (s : String) : TermParser :=
symbol "hello"; symbol "world"; symbol "boo"
||
symbol s
||
symbol "opt3"; symbol "boo"
@[inline2]
def p2 (s : String) : TermParser :=
-- symbol "boo"; p1; p1; p0
p1 "hello"; p0; p1 s
@[inline2]
def p3 (s : String) : TermParser :=
p1 s
||
p2 s
||
symbol "boo"; p2 s
@[inline2]
def p4 (s : String) : CommandParser :=
symbol s; symbol "boo"
end Parser
-- end Lean

View file

@ -236,6 +236,25 @@ partial def reprint : Syntax → Option String
else args.mfoldl "" (λ stx r, do s ← reprint stx, pure $ r ++ s)
| missing := ""
open Lean.Format
protected partial def toFormat : Syntax → Format
| (atom info val) := toFmt $ repr val
| (ident _ _ val pre scopes) :=
let scopes := pre.map toFmt ++ scopes.reverse.map toFmt in
let scopes := match scopes with [] := toFmt "" | _ := bracket "{" (joinSep scopes ", ") "}" in
toFmt "`" ++ toFmt val ++ scopes
| (node kind args scopes) :=
let scopes := match scopes with [] := toFmt "" | _ := bracket "{" (joinSep scopes.reverse ", ") "}" in
if kind.name = `Lean.Parser.noKind then
sbracket $ scopes ++ joinSep (args.toList.map toFormat) line
else
let shorterName := kind.name.replacePrefix `Lean.Parser Name.anonymous in
paren $ joinSep ((toFmt shorterName ++ scopes) :: args.toList.map toFormat) line
| missing := "<missing>"
instance : HasToFormat Syntax := ⟨Syntax.toFormat⟩
instance : HasToString Syntax := ⟨toString ∘ toFmt⟩
end Syntax
end Parser

View file

@ -1,59 +1,28 @@
import parser
open Parser
local infix ` ; `:10 := Parser.andthen
local infix ` || `:5 := Parser.orelse
abbrev Parser (α : Type) := ReaderT Nat (Lean.Parser.ParserM Unit Unit Unit) α
def mkTestKind : IO SyntaxNodeKind := nextKind `test
@[init mkTestKind] constant testKind : SyntaxNodeKind := default _
open Lean.Parser
@[inline2]
def numPair : BasicParser :=
node testKind $
symbol "("; number; symbol ","; number; symbol ")"
-- setOption pp.implicit True
-- setOption Trace.Compiler.boxed True
def numPairs : BasicParser :=
many numPair
def testP : Parser Unit :=
many1 (str "++" <|> str "**" <|> (str "--" *> takeUntil (= '\n') *> any *> pure ()))
@[noinline] def test (p : BasicParser) (s : String) : IO Unit :=
match p.run s with
| Except.error msg := IO.println "error"
| Except.ok stx := IO.println "ok"
def testP2 : Parser Unit :=
many1 ((strLit *> whitespace *> pure ()) <|> (str "--" *> takeUntil (= '\n') *> any *> pure ()))
def testP3 : Parser Unit :=
leanWhitespace
def testParser (x : Parser Unit) (input : String) : String :=
match (x 0).run () () input with
| Lean.Parser.Result.ok _ i _ _ := "Ok at " ++ toString i
| Result.error msg i _ _ := "Error at " ++ toString i ++ ": " ++ msg
def IO.testParser {α : Type} [HasToString α] (x : Parser α) (input : String) : IO Unit :=
match (x 0).run () () input with
| Lean.Parser.Result.ok a _ _ _ := IO.println ("Ok " ++ toString a)
| _ := throw "ERROR"
@[noinline] def test (p : Parser Unit) (s : String) : IO Unit :=
IO.println (testParser p s)
def mkBigString : Nat → String → String
def mkNumPairString : Nat → String → String
| 0 s := s
| (n+1) s := mkBigString n (s ++ "-- new comment\n")
def mkBigString2 : Nat → String → String
| 0 s := s
| (n+1) s := mkBigString2 n (s ++ "\"hello\\nworld\"\n-- comment\n")
def mkBigString3 : Nat → String → String
| 0 s := s
| (n+1) s := mkBigString3 n (s ++ "/- /- comment 1 -/ -/ \n -- comment 2 \n \t \n ")
def prof {α : Type} (msg : String) (p : IO α) : IO α :=
let msg₁ := "Time for '" ++ msg ++ "':" in
let msg₂ := "Memory usage for '" ++ msg ++ "':" in
allocprof msg₂ (timeit msg₁ p)
| (n+1) s := mkNumPairString n (s ++ "(" ++ toString n ++ ", " ++ toString n ++ ") -- comment\n")
def main (xs : List String) : IO Unit :=
do
let s₁ := mkBigString xs.head.toNat "",
let s₂ := s₁ ++ "bad" ++ mkBigString 20 "",
let s₃ := mkBigString2 xs.head.toNat "",
let s₄ := mkBigString3 xs.head.toNat "",
IO.println s₄.length,
prof "Parser 1" (test testP s₁),
prof "Parser 2" (test testP s₂),
prof "Parser 3" (test testP2 s₃),
prof "Parser 4" (test testP3 s₄)
let s := mkNumPairString xs.head.toNat "" in
test numPairs s