test(tests/playground/parser): replace parser.lean with parser2.lean
This commit is contained in:
parent
87690217c6
commit
e53cb81255
5 changed files with 645 additions and 1039 deletions
|
|
@ -1,7 +1,7 @@
|
|||
LEANC=../../../bin/leanc
|
||||
LEAN=../../../bin/lean
|
||||
COMMON_SRCS = parser.lean syntax.lean filemap.lean
|
||||
SRCS = $(COMMON_SRCS) test1.lean test2.lean
|
||||
SRCS = $(COMMON_SRCS) test1.lean
|
||||
OLEANS = $(SRCS:.lean=.olean)
|
||||
CPPS = $(SRCS:.lean=.cpp)
|
||||
COMMON_OBJS = $(COMMON_SRCS:.lean=.o)
|
||||
|
|
@ -31,8 +31,8 @@ depends: $(DEPS)
|
|||
test1: test1.o $(COMMON_OBJS)
|
||||
$(LEANC) -o $@ $^
|
||||
|
||||
test2: test2.o $(COMMON_OBJS)
|
||||
$(LEANC) -o $@ $^
|
||||
# test2: test2.o $(COMMON_OBJS)
|
||||
# $(LEANC) -o $@ $^
|
||||
|
||||
clean:
|
||||
rm -f *.olean
|
||||
|
|
|
|||
|
|
@ -1,337 +1,671 @@
|
|||
namespace Lean
|
||||
import init.lean.name init.lean.parser.trie init.lean.parser.identifier
|
||||
import syntax filemap
|
||||
|
||||
open Lean
|
||||
export Lean.Parser (Trie)
|
||||
-- namespace Lean
|
||||
namespace Parser
|
||||
|
||||
abbrev Pos := String.Pos
|
||||
structure FrontendConfig :=
|
||||
(filename : String)
|
||||
(input : String)
|
||||
(fileMap : FileMap)
|
||||
|
||||
/-
|
||||
σ is the non-backtrackable State
|
||||
δ is the backtrackable State
|
||||
μ is the extra error Message data
|
||||
-/
|
||||
inductive Result (σ δ μ α : Type)
|
||||
| ok {} (a : α) (i : Pos) (st : σ) (bst : δ) : Result
|
||||
| error {} (msg : String) (i : Pos) (st : σ) (extra : Option μ) : Result
|
||||
structure TokenConfig :=
|
||||
(val : String)
|
||||
(lbp : Nat := 0)
|
||||
|
||||
inductive Result.IsOk {σ δ μ α : Type} : Result σ δ μ α → Prop
|
||||
| mk (a : α) (i : Pos) (st : σ) (bst : δ) : Result.IsOk (Result.ok a i st bst)
|
||||
namespace TokenConfig
|
||||
|
||||
theorem errorIsNotOk {σ δ μ α : Type} {msg : String} {i : Pos} {st : σ} {extra : Option μ}
|
||||
(h : Result.IsOk (@Result.error σ δ μ α msg i st extra)) : False :=
|
||||
match h with end
|
||||
def beq : TokenConfig → TokenConfig → Bool
|
||||
| ⟨val₁, lbp₁⟩ ⟨val₂, lbp₂⟩ := val₁ == val₂ && lbp₁ == lbp₂
|
||||
|
||||
@[inline] def unreachableError {σ δ μ α β : Type} {msg : String} {i : Pos} {st : σ} {extra : Option μ}
|
||||
(h : Result.IsOk (@Result.error σ δ μ α msg i st extra)) : β :=
|
||||
False.elim (errorIsNotOk h)
|
||||
instance : HasBeq TokenConfig :=
|
||||
⟨beq⟩
|
||||
|
||||
def input (σ δ μ : Type) : Type := { r : Result σ δ μ Unit // r.IsOk }
|
||||
end TokenConfig
|
||||
|
||||
@[inline] def mkInput {σ δ μ : Type} (i : Pos) (st : σ) (bst : δ) : input σ δ μ :=
|
||||
⟨Result.ok () i st bst, Result.IsOk.mk _ _ _ _⟩
|
||||
structure TokenCacheEntry :=
|
||||
(startPos stopPos : String.Pos)
|
||||
(token : Syntax)
|
||||
|
||||
def ParserM (σ δ μ α : Type) :=
|
||||
String → input σ δ μ → Result σ δ μ α
|
||||
structure ParserCache :=
|
||||
(tokenCache : Option TokenCacheEntry := none)
|
||||
|
||||
variables {σ δ μ α β : Type}
|
||||
structure ParserConfig extends FrontendConfig :=
|
||||
(tokens : Trie TokenConfig)
|
||||
|
||||
namespace ParserM
|
||||
structure ParserData :=
|
||||
(stxStack : Array Syntax) (pos : String.Pos) (cache : ParserCache) (errorMsg : Option String)
|
||||
|
||||
protected def default : ParserM σ δ μ α :=
|
||||
λ _ inp,
|
||||
match inp with
|
||||
| ⟨Result.ok _ i st bst, h⟩ := Result.error "" i st none
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
@[inline] def ParserData.hasError (d : ParserData) : Bool :=
|
||||
d.errorMsg != none
|
||||
|
||||
instance : Inhabited (ParserM σ δ μ α) :=
|
||||
⟨ParserM.default⟩
|
||||
@[inline] def ParserData.stackSize (d : ParserData) : Nat :=
|
||||
d.stxStack.size
|
||||
|
||||
@[inline] def run (p : ParserM σ δ μ α) (st : σ) (bst : δ) (s : String) : Result σ δ μ α :=
|
||||
p s (mkInput 0 st bst)
|
||||
def ParserData.restore (d : ParserData) (iniStackSz : Nat) (iniPos : Nat) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, _, cache, _⟩ := ⟨stack.shrink iniStackSz, iniPos, cache, none⟩
|
||||
|
||||
@[inline] def pure (a : α) : ParserM σ δ μ α :=
|
||||
λ _ inp,
|
||||
match inp with
|
||||
| ⟨Result.ok _ it st bst, h⟩ := Result.ok a it st bst
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
def ParserData.setPos (d : ParserData) (pos : Nat) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, _, cache, msg⟩ := ⟨stack, pos, cache, msg⟩
|
||||
|
||||
@[inline] def bind (x : ParserM σ δ μ α) (f : α → ParserM σ δ μ β) : ParserM σ δ μ β :=
|
||||
λ str inp,
|
||||
match x str inp with
|
||||
| Result.ok a i st bst := f a str (mkInput i st bst)
|
||||
| Result.error msg i st etx := Result.error msg i st etx
|
||||
def ParserData.setCache (d : ParserData) (cache : ParserCache) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, _, msg⟩ := ⟨stack, pos, cache, msg⟩
|
||||
|
||||
instance isMonad : Monad (ParserM σ δ μ) :=
|
||||
{pure := @ParserM.pure _ _ _, bind := @ParserM.bind _ _ _}
|
||||
def ParserData.pushSyntax (d : ParserData) (n : Syntax) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, msg⟩ := ⟨stack.push n, pos, cache, msg⟩
|
||||
|
||||
def mkError (r : input σ δ μ) (msg : String) : Result σ δ μ α :=
|
||||
match r with
|
||||
| ⟨Result.ok _ i st _, _⟩ := Result.error msg i st none
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
def ParserData.next (d : ParserData) (s : String) (pos : Nat) : ParserData :=
|
||||
d.setPos (s.next pos)
|
||||
|
||||
@[inline] def orelse (p q : ParserM σ δ μ α) : ParserM σ δ μ α :=
|
||||
λ str inp,
|
||||
match inp with
|
||||
| ⟨Result.ok _ i₁ _ bst₁, _⟩ :=
|
||||
(match p str inp with
|
||||
| err@(Result.error _ i₂ st₂ _) := if i₁ == i₂ then q str (mkInput i₁ st₂ bst₁) else err
|
||||
| other := other)
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
def ParserData.toErrorMsg (d : ParserData) (cfg : ParserConfig) : String :=
|
||||
match d.errorMsg with
|
||||
| none := ""
|
||||
| some msg :=
|
||||
let pos := cfg.fileMap.toPosition d.pos in
|
||||
cfg.filename ++ ":" ++ toString pos.line ++ ":" ++ toString pos.column ++ " " ++ msg
|
||||
|
||||
@[inline] def failure : ParserM σ δ μ α :=
|
||||
λ _ inp, mkError inp "failure"
|
||||
def ParserFn := String → ParserData → ParserData
|
||||
|
||||
instance : Alternative (ParserM σ δ μ) :=
|
||||
{ orelse := @ParserM.orelse _ _ _,
|
||||
failure := @ParserM.failure _ _ _,
|
||||
.. ParserM.isMonad }
|
||||
instance : Inhabited ParserFn :=
|
||||
⟨λ s, id⟩
|
||||
|
||||
@[inline] def currPos : input σ δ μ → Pos
|
||||
| ⟨Result.ok _ i _ _, _⟩ := i
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
structure ParserInfo :=
|
||||
(updateTokens : Trie TokenConfig → Trie TokenConfig := λ tks, tks)
|
||||
(firstToken : Option TokenConfig := none)
|
||||
|
||||
@[inline] def try {α : Type} (p : ParserM σ δ μ α) : ParserM σ δ μ α :=
|
||||
λ str inp,
|
||||
match inp with
|
||||
| ⟨Result.ok _ i _ _, _⟩ := (match p str inp with
|
||||
| Result.error msg _ st x := Result.error msg i st x
|
||||
| other := other)
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
@[inline] def andthenFn (p q : ParserFn) : ParserFn
|
||||
| s d :=
|
||||
let d := p s d in
|
||||
if d.hasError then d else q s d
|
||||
|
||||
@[inline] def lookahead (p : ParserM σ δ μ α) : ParserM σ δ μ α :=
|
||||
λ str inp,
|
||||
match inp with
|
||||
| ⟨Result.ok _ i _ bst, _⟩ :=
|
||||
(match p str inp with
|
||||
| Result.ok a _ st _ := Result.ok a i st bst
|
||||
| other := other)
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
@[noinline] def andthenInfo (p q : ParserInfo) : ParserInfo :=
|
||||
{ updateTokens := q.updateTokens ∘ p.updateTokens,
|
||||
firstToken := p.firstToken }
|
||||
|
||||
@[specialize] def satisfy (p : Char → Bool) (errorMsg := "unexpected character") : ParserM σ δ μ Char :=
|
||||
λ str inp,
|
||||
match inp with
|
||||
| ⟨Result.ok _ i st bst, _⟩ :=
|
||||
if str.atEnd i then mkError inp "end of input"
|
||||
else let c := str.get i in
|
||||
if p c then Result.ok c (str.next i) st bst
|
||||
else mkError inp errorMsg
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
def ParserData.mkNode (d : ParserData) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, err⟩ :=
|
||||
if err != none && stack.size == iniStackSz then
|
||||
-- If there is an error but there are no new nodes on the stack, we just return `d`
|
||||
d
|
||||
else
|
||||
let newNode := Syntax.node k (stack.extract iniStackSz stack.size) [] in
|
||||
let stack := stack.shrink iniStackSz in
|
||||
let stack := stack.push newNode in
|
||||
⟨stack, pos, cache, err⟩
|
||||
|
||||
@[inline] def ch (c : Char) : ParserM σ δ μ Char :=
|
||||
satisfy (== c) ("expected " ++ repr c)
|
||||
@[inline] def nodeFn (k : SyntaxNodeKind) (p : ParserFn) : ParserFn
|
||||
| s d :=
|
||||
let iniSz := d.stackSize in
|
||||
let d := p s d in
|
||||
d.mkNode k iniSz
|
||||
|
||||
@[inline] def any : ParserM σ δ μ Char :=
|
||||
satisfy (λ _, true)
|
||||
@[noinline] def nodeInfo (p : ParserInfo) : ParserInfo :=
|
||||
{ updateTokens := p.updateTokens,
|
||||
firstToken := p.firstToken }
|
||||
|
||||
@[specialize] partial def takeUntilAux (p : Char → Bool) : ParserM σ δ μ Unit
|
||||
| str inp :=
|
||||
match inp with
|
||||
| ⟨Result.ok _ i st bst, _⟩ :=
|
||||
if str.atEnd i then inp.val
|
||||
else let c := str.get i in
|
||||
if p c then inp.val
|
||||
else takeUntilAux str (mkInput (str.next i) st bst)
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
@[inline] def orelseFn (p q : ParserFn) : ParserFn
|
||||
| s d :=
|
||||
let iniSz := d.stackSize in
|
||||
let iniPos := d.pos in
|
||||
let d := p s d in
|
||||
if d.hasError && d.pos == iniPos then q s (d.restore iniSz iniPos) else d
|
||||
|
||||
@[inline] def takeUntil (p : Char → Bool) : ParserM σ δ μ Unit :=
|
||||
λ str inp, takeUntilAux p str inp
|
||||
@[noinline] def orelseInfo (p q : ParserInfo) : ParserInfo :=
|
||||
{ updateTokens := q.updateTokens ∘ p.updateTokens,
|
||||
firstToken :=
|
||||
match p.firstToken, q.firstToken with
|
||||
| some tk₁, some tk₂ := if tk₁ == tk₂ then some tk₁ else none
|
||||
| _, _ := none }
|
||||
|
||||
partial def strAux (s : String) (errorMsg : String) (initPos : Pos) : Pos → ParserM σ δ μ Unit
|
||||
| j str inp :=
|
||||
if s.atEnd j then inp.val
|
||||
else match inp with
|
||||
| ⟨Result.ok _ i st bst, _⟩ :=
|
||||
if str.atEnd i then Result.error errorMsg initPos st none
|
||||
else if s.get j == str.get i then strAux (s.next j) str (mkInput (str.next i) st bst)
|
||||
else Result.error errorMsg initPos st none
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
def ParserData.resetPos : ParserData → String.Pos → ParserData
|
||||
| ⟨stack, _, cache, errorMsg⟩ pos := ⟨stack, pos, cache, errorMsg⟩
|
||||
|
||||
@[inline] def str (s : String) : ParserM σ δ μ Unit :=
|
||||
λ str inp,
|
||||
let initPos := currPos inp in
|
||||
strAux s ("expected " ++ repr s) initPos 0 str inp
|
||||
@[inline] def tryFn (p : ParserFn) : ParserFn
|
||||
| s d :=
|
||||
let iniPos := d.pos in
|
||||
let d := p s d in
|
||||
if d.hasError then d.resetPos iniPos else d
|
||||
|
||||
@[specialize] partial def manyAux (a : α) (p : ParserM σ δ μ α) : ParserM σ δ μ α
|
||||
| str inp :=
|
||||
match inp with
|
||||
| ⟨Result.ok _ i₀ _ bst₀, _⟩ :=
|
||||
(match p str inp with
|
||||
| Result.ok _ i st bst := manyAux str (mkInput i st bst)
|
||||
| Result.error _ _ st _ := Result.ok a i₀ st bst₀)
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
@[noinline] def noFirstTokenInfo (info : ParserInfo) : ParserInfo :=
|
||||
{ updateTokens := info.updateTokens,
|
||||
firstToken := none }
|
||||
|
||||
@[inline] def many (p : ParserM σ δ μ Unit) : ParserM σ δ μ Unit :=
|
||||
manyAux () p
|
||||
@[inline] def optionalFn (p : ParserFn) : ParserFn :=
|
||||
λ s d,
|
||||
let iniSz := d.stackSize in
|
||||
let iniPos := d.pos in
|
||||
let d := p s d in
|
||||
let d := if d.hasError then d.restore iniSz iniPos else d in
|
||||
d.mkNode nullKind iniSz
|
||||
|
||||
@[inline] def many1 (p : ParserM σ δ μ Unit) : ParserM σ δ μ Unit :=
|
||||
p *> many p
|
||||
def ParserData.mkError (d : ParserData) (msg : String) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, _⟩ := ⟨stack, pos, cache, some msg⟩
|
||||
|
||||
def pos : ParserM σ δ μ Pos :=
|
||||
λ str inp,
|
||||
match inp with
|
||||
| ⟨Result.ok _ i st bst, _⟩ := Result.ok i i st bst
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
def ParserData.mkEOIError (d : ParserData) : ParserData :=
|
||||
d.mkError "end of input"
|
||||
|
||||
def error {α : Type} (msg : String) : ParserM σ δ μ α :=
|
||||
λ _ inp, mkError inp msg
|
||||
@[specialize] partial def manyAux (p : ParserFn) : String → ParserData → ParserData
|
||||
| s d :=
|
||||
let iniSz := d.stackSize in
|
||||
let iniPos := d.pos in
|
||||
let d := p s d in
|
||||
if d.hasError then d.restore iniSz iniPos
|
||||
else if iniPos == d.pos then d.mkError "invalid 'many' parser combinator application, parser did not consume anything"
|
||||
else manyAux s d
|
||||
|
||||
def errorAt {α : Type} (pos : Pos) (msg : String) : ParserM σ δ μ α :=
|
||||
λ _ inp, match inp with
|
||||
| ⟨Result.ok _ _ st _, _⟩ := Result.error msg pos st none
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
@[inline] def manyFn (p : ParserFn) : ParserFn
|
||||
| s d :=
|
||||
let iniSz := d.stackSize in
|
||||
let d := manyAux p s d in
|
||||
d.mkNode nullKind iniSz
|
||||
|
||||
def hexDigit : ParserM σ δ μ Nat
|
||||
| str inp := match inp with
|
||||
| ⟨Result.ok _ i st bst, _⟩ :=
|
||||
if str.atEnd i then mkError inp "unexpected end of input"
|
||||
else
|
||||
let c := str.get i in
|
||||
let i := str.next i in
|
||||
if c.isDigit then Result.ok (c.toNat - '0'.toNat) i st bst
|
||||
else if 'a' <= c && c <= 'f' then Result.ok (10 + c.toNat - 'a'.toNat) i st bst
|
||||
else if 'A' <= c && c <= 'F' then Result.ok (10 + c.toNat - 'A'.toNat) i st bst
|
||||
else mkError inp "invalid hexadecimal numeral, hexadecimal digit expected"
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
@[specialize] partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
if p c then d.next s i
|
||||
else d.mkError errorMsg
|
||||
|
||||
def quotedChar : ParserM σ δ μ Char :=
|
||||
do p ← pos,
|
||||
c ← any,
|
||||
if c = '\\' then pure '\\'
|
||||
else if c = '\"' then pure '\"'
|
||||
else if c = '\'' then pure '\''
|
||||
else if c = 'n' then pure '\n'
|
||||
else if c = 't' then pure '\t'
|
||||
else if c = 'x' then do {
|
||||
d₁ ← hexDigit,
|
||||
d₂ ← hexDigit,
|
||||
pure $ Char.ofNat (16*d₁ + d₂) }
|
||||
else if c = 'u' then do {
|
||||
d₁ ← hexDigit,
|
||||
d₂ ← hexDigit,
|
||||
d₃ ← hexDigit,
|
||||
d₄ ← hexDigit,
|
||||
pure $ Char.ofNat (16*(16*(16*d₁ + d₂) + d₃) + d₄) }
|
||||
else errorAt p "invalid escape sequence"
|
||||
@[specialize] partial def takeUntilFn (p : Char → Bool) : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d
|
||||
else
|
||||
let c := s.get i in
|
||||
if p c then d
|
||||
else takeUntilFn s (d.next s i)
|
||||
|
||||
partial def strLitAux : String → ParserM σ δ μ String
|
||||
| out := do
|
||||
c ← any,
|
||||
if c == '\"' then pure out
|
||||
else if c == '\\' then do c ← quotedChar, strLitAux (out.push c)
|
||||
else strLitAux (out.push c)
|
||||
@[specialize] def takeWhileFn (p : Char → Bool) : ParserFn :=
|
||||
takeUntilFn (λ c, !p c)
|
||||
|
||||
def strLit : ParserM σ δ μ String :=
|
||||
satisfy (== '\"') "expected string literal" *> strLitAux ""
|
||||
@[inline] def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : ParserFn :=
|
||||
andthenFn (satisfyFn p errorMsg) (takeWhileFn p)
|
||||
|
||||
partial def finishCommentBlock : Nat → ParserM σ δ μ Unit
|
||||
| nesting str inp :=
|
||||
match inp with
|
||||
| ⟨Result.ok _ i st bst, _⟩ :=
|
||||
if str.atEnd i then mkError inp "end of input"
|
||||
else
|
||||
let c := str.get i in
|
||||
let i := str.next i in
|
||||
partial def finishCommentBlock : Nat → ParserFn
|
||||
| nesting s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
let i := s.next i in
|
||||
if c == '-' then
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
if c == '/' then -- "-/" end of comment
|
||||
if nesting == 1 then d.next s i
|
||||
else finishCommentBlock (nesting-1) s (d.next s i)
|
||||
else
|
||||
finishCommentBlock nesting s (d.next s i)
|
||||
else if c == '/' then
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
if c == '-' then finishCommentBlock (nesting+1) s (d.next s i)
|
||||
else finishCommentBlock nesting s (d.setPos i)
|
||||
else finishCommentBlock nesting s (d.setPos i)
|
||||
|
||||
/- Consume whitespace and comments -/
|
||||
partial def whitespace : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d
|
||||
else
|
||||
let c := s.get i in
|
||||
if c.isWhitespace then whitespace s (d.next s i)
|
||||
else if c == '-' then
|
||||
let i := s.next i in
|
||||
let c := s.get i in
|
||||
if c == '-' then andthenFn (takeUntilFn (= '\n')) whitespace s (d.next s i)
|
||||
else d
|
||||
else if c == '/' then
|
||||
let i := s.next i in
|
||||
let c := s.get i in
|
||||
if c == '-' then
|
||||
if str.atEnd i then mkError inp "end of input"
|
||||
else
|
||||
let c := str.get i in
|
||||
if c == '/' then -- "-/" end of comment
|
||||
if nesting == 1 then Result.ok () (str.next i) st bst
|
||||
else finishCommentBlock (nesting-1) str (mkInput (str.next i) st bst)
|
||||
else
|
||||
finishCommentBlock nesting str (mkInput i st bst)
|
||||
else if c == '/' then
|
||||
if str.atEnd i then mkError inp "end of input"
|
||||
else
|
||||
let c := str.get i in
|
||||
if c == '-' then finishCommentBlock (nesting+1) str (mkInput (str.next i) st bst)
|
||||
else finishCommentBlock nesting str (mkInput i st bst)
|
||||
else finishCommentBlock nesting str (mkInput i st bst)
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
let i := s.next i in
|
||||
let c := s.get i in
|
||||
if c == '-' then d -- "/--" doc comment is an actual token
|
||||
else andthenFn (finishCommentBlock 1) whitespace s (d.next s i)
|
||||
else d
|
||||
else d
|
||||
|
||||
partial def leanWhitespace : ParserM σ δ μ Unit
|
||||
| str inp :=
|
||||
match inp with
|
||||
| ⟨Result.ok _ i st bst, _⟩ :=
|
||||
if str.atEnd i then inp.val
|
||||
def mkEmptySubstringAt (s : String) (p : Nat) : Substring :=
|
||||
{str := s, startPos := p, stopPos := p }
|
||||
|
||||
private def rawAux (startPos : Nat) (trailingWs : Bool) : ParserFn
|
||||
| s d :=
|
||||
let stopPos := d.pos in
|
||||
let leading := mkEmptySubstringAt s startPos in
|
||||
let val := s.extract startPos stopPos in
|
||||
if trailingWs then
|
||||
let d := whitespace s d in
|
||||
let stopPos' := d.pos in
|
||||
let trailing : Substring := { str := s, startPos := stopPos, stopPos := stopPos' } in
|
||||
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
|
||||
d.pushSyntax atom
|
||||
else
|
||||
let trailing := mkEmptySubstringAt s stopPos in
|
||||
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
|
||||
d.pushSyntax atom
|
||||
|
||||
/-- Match an arbitrary Parser and return the consumed String in a `Syntax.atom`. -/
|
||||
@[inline] def rawFn (p : ParserFn) (trailingWs := false) : ParserFn
|
||||
| s d :=
|
||||
let startPos := d.pos in
|
||||
let d := p s d in
|
||||
if d.hasError then d else rawAux startPos trailingWs s d
|
||||
|
||||
def hexDigitFn : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
let i := s.next i in
|
||||
if c.isDigit || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') then d.setPos i
|
||||
else d.mkError "invalid hexadecimal numeral, hexadecimal digit expected"
|
||||
|
||||
def quotedCharFn : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
if c == '\\' || c == '\"' || c == '\'' || c == '\n' || c == '\t' then
|
||||
d.next s i
|
||||
else if c == 'x' then
|
||||
andthenFn hexDigitFn hexDigitFn s (d.next s i)
|
||||
else if c == 'u' then
|
||||
andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) s (d.next s i)
|
||||
else
|
||||
let c := str.get i in
|
||||
if c.isWhitespace then leanWhitespace str (mkInput (str.next i) st bst)
|
||||
else if c == '-' then
|
||||
let i := str.next i in
|
||||
let c := str.get i in
|
||||
if c == '-' then ((takeUntil (= '\n')) *> leanWhitespace) str (mkInput i st bst)
|
||||
else inp.val
|
||||
else if c == '/' then
|
||||
let i := str.next i in
|
||||
let c := str.get i in
|
||||
if c == '-' then
|
||||
let i := str.next i in
|
||||
let c := str.get i in
|
||||
if c == '-' then inp.val -- "/--" doc comment is an actual token
|
||||
else ((finishCommentBlock 1) *> leanWhitespace) str (mkInput i st bst)
|
||||
else inp.val
|
||||
else inp.val
|
||||
| ⟨Result.error _ _ _ _, h⟩ := unreachableError h
|
||||
d.mkError "invalid escape sequence"
|
||||
|
||||
end ParserM
|
||||
def mkStrLitKind : IO SyntaxNodeKind := nextKind `strLit
|
||||
@[init mkStrLitKind] constant strLitKind : SyntaxNodeKind := default _
|
||||
|
||||
class monadParser (σ : outParam Type) (δ : outParam Type) (μ : outParam Type) (m : Type → Type) :=
|
||||
(lift {} {α : Type} : ParserM σ δ μ α → m α)
|
||||
(map {} {α : Type} : (Π β, ParserM σ δ μ β → ParserM σ δ μ β) → m α → m α)
|
||||
/-- Push `(Syntax.node tk <new-atom>)` into syntax stack -/
|
||||
def mkNodeToken (k : SyntaxNodeKind) (startPos : Nat) (s : String) (d : ParserData) : ParserData :=
|
||||
let stopPos := d.pos in
|
||||
let leading := mkEmptySubstringAt s startPos in
|
||||
let val := s.extract startPos stopPos in
|
||||
let d := whitespace s d in
|
||||
let wsStopPos := d.pos in
|
||||
let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in
|
||||
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
|
||||
let tk := Syntax.node k (Array.singleton atom) [] in
|
||||
d.pushSyntax tk
|
||||
|
||||
instance monadParserBase : monadParser σ δ μ (ParserM σ δ μ) :=
|
||||
{ lift := λ α, id,
|
||||
map := λ α f x, f α x }
|
||||
partial def strLitFnAux (startPos : Nat) : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
let d := d.setPos (s.next i) in
|
||||
if c == '\"' then
|
||||
mkNodeToken strLitKind startPos s d
|
||||
else if c == '\\' then andthenFn quotedCharFn strLitFnAux s d
|
||||
else strLitFnAux s d
|
||||
|
||||
instance monadParserTrans {m n : Type → Type} [HasMonadLift m n] [MonadFunctor m m n n] [monadParser σ δ μ m] : monadParser σ δ μ n :=
|
||||
{ lift := λ α p, monadLift (monadParser.lift p : m α),
|
||||
map := λ α f x, monadMap (λ β x, (monadParser.map @f x : m β)) x }
|
||||
def mkNumberKind : IO SyntaxNodeKind := nextKind `number
|
||||
@[init mkNumberKind] constant numberKind : SyntaxNodeKind := default _
|
||||
|
||||
class monadParserAux (σ : outParam Type) (δ : outParam Type) (μ : outParam Type) (m : Type → Type) :=
|
||||
(map {} {α : Type} : (ParserM σ δ μ α → ParserM σ δ μ α) → m α → m α)
|
||||
def decimalNumberFn (startPos : Nat) : ParserFn
|
||||
| s d :=
|
||||
let d := takeWhileFn (λ c, c.isDigit) s d in
|
||||
let i := d.pos in
|
||||
let c := s.get i in
|
||||
let d :=
|
||||
if c == '.' then
|
||||
let i := s.next i in
|
||||
let c := s.get i in
|
||||
if c.isDigit then
|
||||
takeWhileFn (λ c, c.isDigit) s (d.setPos i)
|
||||
else
|
||||
d
|
||||
else
|
||||
d in
|
||||
mkNodeToken numberKind startPos s d
|
||||
|
||||
instance monadParserAuxBase : monadParserAux σ δ μ (ParserM σ δ μ) :=
|
||||
{ map := λ α, id }
|
||||
def binNumberFn (startPos : Nat) : ParserFn
|
||||
| s d :=
|
||||
let d := takeWhile1Fn (λ c, c == '0' || c == '1') "expected binary number" s d in
|
||||
mkNodeToken numberKind startPos s d
|
||||
|
||||
instance monadParserAuxReader {m : Type → Type} {ρ : Type} [Monad m] [monadParserAux σ δ μ m] : monadParserAux σ δ μ (ReaderT ρ m) :=
|
||||
{ map := λ α f x r, (monadParserAux.map f : m α → m α) (x r) }
|
||||
def octalNumberFn (startPos : Nat) : ParserFn
|
||||
| s d :=
|
||||
let d := takeWhile1Fn (λ c, '0' ≤ c && c ≤ '7') "expected octal number" s d in
|
||||
mkNodeToken numberKind startPos s d
|
||||
|
||||
section
|
||||
variables {m : Type → Type} [monadParser σ δ μ m]
|
||||
def hexNumberFn (startPos : Nat) : ParserFn
|
||||
| s d :=
|
||||
let d := takeWhile1Fn (λ c, ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')) "expected hexadecimal number" s d in
|
||||
mkNodeToken numberKind startPos s d
|
||||
|
||||
@[inline] def satisfy (p : Char → Bool) : m Char := monadParser.lift (ParserM.satisfy p)
|
||||
def ch (c : Char) : m Char := monadParser.lift (ParserM.ch c)
|
||||
def alpha : m Char := satisfy Char.isAlpha
|
||||
def digit : m Char := satisfy Char.isDigit
|
||||
def upper : m Char := satisfy Char.isUpper
|
||||
def lower : m Char := satisfy Char.isLower
|
||||
def any : m Char := satisfy (λ _, true)
|
||||
def numberFnAux : ParserFn
|
||||
| s d :=
|
||||
let startPos := d.pos in
|
||||
if s.atEnd startPos then d.mkEOIError
|
||||
else
|
||||
let c := s.get startPos in
|
||||
if c == '0' then
|
||||
let i := s.next startPos in
|
||||
let c := s.get i in
|
||||
if c == 'b' || c == 'B' then
|
||||
binNumberFn startPos s (d.next s i)
|
||||
else if c == 'o' || c == 'O' then
|
||||
octalNumberFn startPos s (d.next s i)
|
||||
else if c == 'x' || c == 'X' then
|
||||
hexNumberFn startPos s (d.next s i)
|
||||
else
|
||||
decimalNumberFn startPos s (d.next s i)
|
||||
else if c.isDigit then
|
||||
decimalNumberFn startPos s (d.next s startPos)
|
||||
else
|
||||
d.mkError "expected numeral"
|
||||
|
||||
@[inline] def takeUntil (p : Char → Bool) : m Unit := monadParser.lift (ParserM.takeUntil p)
|
||||
def isIdCont : String → ParserData → Bool
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
let c := s.get i in
|
||||
if c == '.' then
|
||||
let i := s.next i in
|
||||
if s.atEnd i then
|
||||
false
|
||||
else
|
||||
let c := s.get i in
|
||||
isIdFirst c || isIdBeginEscape c
|
||||
else
|
||||
false
|
||||
|
||||
@[inline] def str (s : String) : m Unit := monadParser.lift (ParserM.str s)
|
||||
private def isToken (idStartPos idStopPos : Nat) (tk : Option TokenConfig) : Bool :=
|
||||
match tk with
|
||||
| none := false
|
||||
| some tk :=
|
||||
-- if a token is both a symbol and a valid identifier (i.e. a keyword),
|
||||
-- we want it to be recognized as a symbol
|
||||
tk.val.bsize ≥ idStopPos - idStopPos
|
||||
|
||||
@[inline] def try (p : m α) : m α :=
|
||||
monadParser.map (λ _ p, ParserM.try p) p
|
||||
|
||||
@[inline] def lookahead (p : m α) : m α :=
|
||||
monadParser.map (λ _ p, ParserM.lookahead p) p
|
||||
def mkTokenAndFixPos (startPos : Nat) (tk : Option TokenConfig) (s : String) (d : ParserData) : ParserData :=
|
||||
match tk with
|
||||
| none :=
|
||||
let d := d.setPos startPos in
|
||||
d.mkError "token expected"
|
||||
| some tk :=
|
||||
let leading := mkEmptySubstringAt s startPos in
|
||||
let val := tk.val in
|
||||
let stopPos := startPos + val.bsize in
|
||||
let d := d.setPos stopPos in
|
||||
let wsStopPos := d.pos in
|
||||
let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in
|
||||
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
|
||||
d.pushSyntax atom
|
||||
|
||||
@[inline] def takeWhile (p : Char → Bool) : m Unit := takeUntil (λ c, !p c)
|
||||
def mkIdResult (startPos : Nat) (tk : Option TokenConfig) (val : Name) (s : String) (d : ParserData) : ParserData :=
|
||||
let stopPos := d.pos in
|
||||
if isToken startPos stopPos tk then
|
||||
mkTokenAndFixPos startPos tk s d
|
||||
else
|
||||
let rawVal : Substring := { str := s, startPos := startPos, stopPos := stopPos } in
|
||||
let d := whitespace s d in
|
||||
let trailingStopPos := d.pos in
|
||||
let leading := mkEmptySubstringAt s startPos in
|
||||
let trailing : Substring := { str := s, startPos := stopPos, stopPos := trailingStopPos } in
|
||||
let info : SourceInfo := {leading := leading, trailing := trailing, pos := startPos} in
|
||||
let atom := Syntax.ident (some info) rawVal val [] [] in
|
||||
d.pushSyntax atom
|
||||
|
||||
@[inline] def whitespace : m Unit := takeWhile Char.isWhitespace
|
||||
partial def identFnAux (startPos : Nat) (tk : Option TokenConfig) : Name → ParserFn
|
||||
| r s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
if isIdBeginEscape c then
|
||||
let startPart := s.next i in
|
||||
let d := takeUntilFn isIdEndEscape s (d.setPos startPart) in
|
||||
let stopPart := d.pos in
|
||||
let d := satisfyFn isIdEndEscape "end of escaped identifier expected" s d in
|
||||
if d.hasError then d
|
||||
else
|
||||
let r := Name.mkString r (s.extract startPart stopPart) in
|
||||
if isIdCont s d then
|
||||
identFnAux r s d
|
||||
else
|
||||
mkIdResult startPos tk r s d
|
||||
else if isIdFirst c then
|
||||
let startPart := i in
|
||||
let d := takeWhileFn isIdRest s (d.next s i) in
|
||||
let stopPart := d.pos in
|
||||
let r := Name.mkString r (s.extract startPart stopPart) in
|
||||
if isIdCont s d then
|
||||
identFnAux r s d
|
||||
else
|
||||
mkIdResult startPart tk r s d
|
||||
else
|
||||
mkTokenAndFixPos startPos tk s d
|
||||
|
||||
@[inline] def strLit : m String := monadParser.lift (ParserM.strLit)
|
||||
structure AbsParser (ρ : Type) :=
|
||||
(info : Thunk ParserInfo := Thunk.mk (λ _, {}))
|
||||
(fn : ρ)
|
||||
|
||||
@[inline] def leanWhitespace : m Unit := monadParser.lift ParserM.leanWhitespace
|
||||
end
|
||||
abbrev Parser := AbsParser ParserFn
|
||||
|
||||
section
|
||||
variables {m : Type → Type} [monadParserAux σ δ μ m]
|
||||
class ParserFnLift (ρ : Type) :=
|
||||
(lift {} : ParserFn → ρ)
|
||||
(map : (ParserFn → ParserFn) → ρ → ρ)
|
||||
(map₂ : (ParserFn → ParserFn → ParserFn) → ρ → ρ → ρ)
|
||||
|
||||
@[inline] def many (p : m Unit) : m Unit := monadParserAux.map ParserM.many p
|
||||
@[inline] def many1 (p : m Unit) : m Unit := monadParserAux.map ParserM.many1 p
|
||||
instance parserLiftInhabited {ρ : Type} [ParserFnLift ρ] : Inhabited ρ :=
|
||||
⟨ParserFnLift.lift (default _)⟩
|
||||
|
||||
end
|
||||
instance idParserLift : ParserFnLift ParserFn :=
|
||||
{ lift := λ p, p,
|
||||
map := λ m p, m p,
|
||||
map₂ := λ m p1 p2, m p1 p2 }
|
||||
|
||||
@[inline]
|
||||
def liftParser {ρ : Type} [ParserFnLift ρ] (info : Thunk ParserInfo) (fn : ParserFn) : AbsParser ρ :=
|
||||
{ info := info, fn := ParserFnLift.lift fn }
|
||||
|
||||
@[inline]
|
||||
def mapParser {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn) : AbsParser ρ → AbsParser ρ :=
|
||||
λ p, { info := Thunk.mk (λ _, infoFn p.info.get), fn := ParserFnLift.map pFn p.fn }
|
||||
|
||||
@[inline]
|
||||
def mapParser₂ {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn → ParserFn)
|
||||
: AbsParser ρ → AbsParser ρ → AbsParser ρ :=
|
||||
λ p q, { info := Thunk.mk (λ _, infoFn p.info.get q.info.get), fn := ParserFnLift.map₂ pFn p.fn q.fn }
|
||||
|
||||
def EnvParserFn (α : Type) (ρ : Type) :=
|
||||
α → ρ
|
||||
|
||||
def RecParserFn (α ρ : Type) :=
|
||||
EnvParserFn (α → ρ) ρ
|
||||
|
||||
instance envParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (EnvParserFn α ρ) :=
|
||||
{ lift := λ p a, ParserFnLift.lift p,
|
||||
map := λ m p a, ParserFnLift.map m (p a),
|
||||
map₂ := λ m p1 p2 a, ParserFnLift.map₂ m (p1 a) (p2 a) }
|
||||
|
||||
instance recParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (RecParserFn α ρ) :=
|
||||
inferInstanceAs (ParserFnLift (EnvParserFn (α → ρ) ρ))
|
||||
|
||||
namespace RecParserFn
|
||||
variables {α ρ : Type}
|
||||
|
||||
@[inline] def recurse (a : α) : RecParserFn α ρ :=
|
||||
λ p, p a
|
||||
|
||||
@[inline] def run [ParserFnLift ρ] (x : RecParserFn α ρ) (rec : α → RecParserFn α ρ) : ρ :=
|
||||
x (fix (λ f a, rec a f))
|
||||
|
||||
end RecParserFn
|
||||
|
||||
@[inline] def andthen {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ :=
|
||||
mapParser₂ andthenInfo andthenFn
|
||||
|
||||
@[inline] def node {ρ : Type} [ParserFnLift ρ] (k : SyntaxNodeKind) : AbsParser ρ → AbsParser ρ :=
|
||||
mapParser nodeInfo (nodeFn k)
|
||||
|
||||
@[inline] def orelse {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ :=
|
||||
mapParser₂ orelseInfo orelseFn
|
||||
|
||||
@[inline] def try {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
|
||||
mapParser noFirstTokenInfo tryFn
|
||||
|
||||
@[inline] def many {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
|
||||
mapParser noFirstTokenInfo manyFn
|
||||
|
||||
@[inline] def optional {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
|
||||
mapParser noFirstTokenInfo optionalFn
|
||||
|
||||
@[inline] def many1 {ρ : Type} [ParserFnLift ρ] (p : AbsParser ρ) : AbsParser ρ :=
|
||||
andthen p (many p)
|
||||
|
||||
abbrev BasicParserFn : Type := EnvParserFn ParserConfig ParserFn
|
||||
abbrev BasicParser : Type := AbsParser BasicParserFn
|
||||
abbrev CmdParserFn (ρ : Type) : Type := EnvParserFn ρ (RecParserFn Unit ParserFn)
|
||||
abbrev TermParserFn : Type := RecParserFn Nat (CmdParserFn ParserConfig)
|
||||
abbrev TermParser : Type := AbsParser TermParserFn
|
||||
abbrev TrailingTermParser : Type := AbsParser (EnvParserFn Syntax TermParserFn)
|
||||
|
||||
def TokenMap (α : Type) := RBMap Name (List α) Name.quickLt
|
||||
|
||||
structure CommandParserConfig extends ParserConfig :=
|
||||
(leadingTermParsers : TokenMap TermParser)
|
||||
(trailingTermParsers : TokenMap TrailingTermParser)
|
||||
-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them
|
||||
(localLeadingTermParsers : TokenMap TermParser := mkRBMap _ _ _)
|
||||
(localTrailingTermParsers : TokenMap TrailingTermParser := mkRBMap _ _ _)
|
||||
|
||||
abbrev CommandParser : Type := AbsParser (CmdParserFn CommandParserConfig)
|
||||
|
||||
@[inline] def Term.parser (rbp : Nat := 0) : TermParser :=
|
||||
{ fn := RecParserFn.recurse rbp }
|
||||
|
||||
@[inline] def Command.parser : CommandParser :=
|
||||
{ fn := λ _, RecParserFn.recurse () }
|
||||
|
||||
@[inline] def basicParser2TermParser (p : BasicParser) : TermParser :=
|
||||
{ info := Thunk.mk (λ _, p.info.get), fn := λ _ cfg _, p.fn cfg }
|
||||
|
||||
instance basic2term : HasCoe BasicParser TermParser :=
|
||||
⟨basicParser2TermParser⟩
|
||||
|
||||
@[inline] def basicParser2CmdParser (p : BasicParser) : CommandParser :=
|
||||
{ info := Thunk.mk (λ _, p.info.get), fn := λ cfg _, p.fn cfg.toParserConfig }
|
||||
|
||||
instance basicmd : HasCoe BasicParser CommandParser :=
|
||||
⟨basicParser2CmdParser⟩
|
||||
|
||||
private def tokenFnAux : BasicParserFn
|
||||
| cfg s d :=
|
||||
let i := d.pos in
|
||||
let c := s.get i in
|
||||
if c == '\"' then
|
||||
strLitFnAux i s (d.next s i)
|
||||
else if c.isDigit then
|
||||
numberFnAux s d
|
||||
else
|
||||
let (_, tk) := cfg.tokens.matchPrefix s i in
|
||||
identFnAux i tk Name.anonymous s d
|
||||
|
||||
private def updateCache (startPos : Nat) (d : ParserData) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, none⟩ :=
|
||||
if stack.size == 0 then d
|
||||
else
|
||||
let tk := stack.back in
|
||||
⟨stack, pos, { tokenCache := some { startPos := startPos, stopPos := pos, token := tk } }, none⟩
|
||||
| other := other
|
||||
|
||||
def tokenFn : BasicParserFn
|
||||
| cfg s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
match d.cache with
|
||||
| { tokenCache := some tkc } :=
|
||||
if tkc.startPos == i then
|
||||
let d := d.pushSyntax tkc.token in
|
||||
d.setPos tkc.stopPos
|
||||
else
|
||||
let d := tokenFnAux cfg s d in
|
||||
updateCache i d
|
||||
| _ :=
|
||||
let d := tokenFnAux cfg s d in
|
||||
updateCache i d
|
||||
|
||||
def symbolFnAux (sym : String) (errorMsg : String) : BasicParserFn
|
||||
| cfg s d :=
|
||||
let d := tokenFn cfg s d in
|
||||
if d.hasError then
|
||||
d.mkError errorMsg
|
||||
else
|
||||
match d.stxStack.back with
|
||||
| Syntax.atom _ sym' := if sym == sym' then d else d.mkError errorMsg
|
||||
| _ := d.mkError errorMsg
|
||||
|
||||
@[inline] def symbolFn (sym : String) : BasicParserFn :=
|
||||
symbolFnAux sym ("expected '" ++ sym ++ "'")
|
||||
|
||||
def symbolInfo (sym : String) (lbp : Nat) : ParserInfo :=
|
||||
{ updateTokens := λ trie, trie.insert sym { val := sym, lbp := lbp },
|
||||
firstToken := some { val := sym, lbp := lbp } }
|
||||
|
||||
@[inline] def symbol (sym : String) (lbp : Nat := 0) : BasicParser :=
|
||||
{ info := Thunk.mk (λ _, symbolInfo sym lbp),
|
||||
fn := symbolFn sym }
|
||||
|
||||
def numberFn : BasicParserFn
|
||||
| cfg s d :=
|
||||
let d := tokenFn cfg s d in
|
||||
if d.hasError || !(d.stxStack.back.isOfKind numberKind) then
|
||||
d.mkError "expected numeral"
|
||||
else
|
||||
d
|
||||
|
||||
@[inline] def number : BasicParser :=
|
||||
{ fn := numberFn }
|
||||
|
||||
def strLitFn : BasicParserFn
|
||||
| cfg s d :=
|
||||
let d := tokenFn cfg s d in
|
||||
if d.hasError || !(d.stxStack.back.isOfKind strLitKind) then
|
||||
d.mkError "expected string literal"
|
||||
else
|
||||
d
|
||||
|
||||
@[inline] def strLit : BasicParser :=
|
||||
{ fn := numberFn }
|
||||
|
||||
def mkFrontendConfig (filename input : String) : FrontendConfig :=
|
||||
{ filename := filename,
|
||||
input := input,
|
||||
fileMap := input.toFileMap }
|
||||
|
||||
def BasicParser.run (p : BasicParser) (input : String) (filename : String := "<input>") : Except String Syntax :=
|
||||
let frontendCfg := mkFrontendConfig filename input in
|
||||
let tokens := p.info.get.updateTokens {} in
|
||||
let cfg : ParserConfig := { tokens := tokens, .. frontendCfg } in
|
||||
let d : ParserData := { stxStack := Array.empty, pos := 0, cache := {}, errorMsg := none } in
|
||||
let d := p.fn cfg input d in
|
||||
if d.hasError then
|
||||
Except.error (d.toErrorMsg cfg)
|
||||
else
|
||||
Except.ok d.stxStack.back
|
||||
|
||||
end Parser
|
||||
end Lean
|
||||
-- end Lean
|
||||
|
|
|
|||
|
|
@ -1,716 +0,0 @@
|
|||
import init.lean.name init.lean.parser.trie init.lean.parser.identifier
|
||||
import syntax filemap
|
||||
|
||||
open Lean
|
||||
export Lean.Parser (Trie)
|
||||
-- namespace Lean
|
||||
namespace Parser
|
||||
|
||||
structure FrontendConfig :=
|
||||
(filename : String)
|
||||
(input : String)
|
||||
(fileMap : FileMap)
|
||||
|
||||
structure TokenConfig :=
|
||||
(val : String)
|
||||
(lbp : Nat := 0)
|
||||
|
||||
namespace TokenConfig
|
||||
|
||||
def beq : TokenConfig → TokenConfig → Bool
|
||||
| ⟨val₁, lbp₁⟩ ⟨val₂, lbp₂⟩ := val₁ == val₂ && lbp₁ == lbp₂
|
||||
|
||||
instance : HasBeq TokenConfig :=
|
||||
⟨beq⟩
|
||||
|
||||
end TokenConfig
|
||||
|
||||
structure TokenCacheEntry :=
|
||||
(startPos stopPos : String.Pos)
|
||||
(token : Syntax)
|
||||
|
||||
structure ParserCache :=
|
||||
(tokenCache : Option TokenCacheEntry := none)
|
||||
|
||||
structure ParserConfig extends FrontendConfig :=
|
||||
(tokens : Trie TokenConfig)
|
||||
|
||||
structure ParserData :=
|
||||
(stxStack : Array Syntax) (pos : String.Pos) (cache : ParserCache) (errorMsg : Option String)
|
||||
|
||||
@[inline] def ParserData.hasError (d : ParserData) : Bool :=
|
||||
d.errorMsg != none
|
||||
|
||||
@[inline] def ParserData.stackSize (d : ParserData) : Nat :=
|
||||
d.stxStack.size
|
||||
|
||||
def ParserData.restore (d : ParserData) (iniStackSz : Nat) (iniPos : Nat) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, _, cache, _⟩ := ⟨stack.shrink iniStackSz, iniPos, cache, none⟩
|
||||
|
||||
def ParserData.setPos (d : ParserData) (pos : Nat) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, _, cache, msg⟩ := ⟨stack, pos, cache, msg⟩
|
||||
|
||||
def ParserData.setCache (d : ParserData) (cache : ParserCache) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, _, msg⟩ := ⟨stack, pos, cache, msg⟩
|
||||
|
||||
def ParserData.pushSyntax (d : ParserData) (n : Syntax) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, msg⟩ := ⟨stack.push n, pos, cache, msg⟩
|
||||
|
||||
def ParserData.next (d : ParserData) (s : String) (pos : Nat) : ParserData :=
|
||||
d.setPos (s.next pos)
|
||||
|
||||
def ParserData.toErrorMsg (d : ParserData) (cfg : ParserConfig) : String :=
|
||||
match d.errorMsg with
|
||||
| none := ""
|
||||
| some msg :=
|
||||
let pos := cfg.fileMap.toPosition d.pos in
|
||||
cfg.filename ++ ":" ++ toString pos.line ++ ":" ++ toString pos.column ++ " " ++ msg
|
||||
|
||||
def ParserFn := String → ParserData → ParserData
|
||||
|
||||
instance : Inhabited ParserFn :=
|
||||
⟨λ s, id⟩
|
||||
|
||||
structure ParserInfo :=
|
||||
(updateTokens : Trie TokenConfig → Trie TokenConfig := λ tks, tks)
|
||||
(firstToken : Option TokenConfig := none)
|
||||
|
||||
@[inline] def andthenFn (p q : ParserFn) : ParserFn
|
||||
| s d :=
|
||||
let d := p s d in
|
||||
if d.hasError then d else q s d
|
||||
|
||||
@[noinline] def andthenInfo (p q : ParserInfo) : ParserInfo :=
|
||||
{ updateTokens := q.updateTokens ∘ p.updateTokens,
|
||||
firstToken := p.firstToken }
|
||||
|
||||
def ParserData.mkNode (d : ParserData) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, err⟩ :=
|
||||
if err != none && stack.size == iniStackSz then
|
||||
-- If there is an error but there are no new nodes on the stack, we just return `d`
|
||||
d
|
||||
else
|
||||
let newNode := Syntax.node k (stack.extract iniStackSz stack.size) [] in
|
||||
let stack := stack.shrink iniStackSz in
|
||||
let stack := stack.push newNode in
|
||||
⟨stack, pos, cache, err⟩
|
||||
|
||||
@[inline] def nodeFn (k : SyntaxNodeKind) (p : ParserFn) : ParserFn
|
||||
| s d :=
|
||||
let iniSz := d.stackSize in
|
||||
let d := p s d in
|
||||
d.mkNode k iniSz
|
||||
|
||||
@[noinline] def nodeInfo (p : ParserInfo) : ParserInfo :=
|
||||
{ updateTokens := p.updateTokens,
|
||||
firstToken := p.firstToken }
|
||||
|
||||
@[inline] def orelseFn (p q : ParserFn) : ParserFn
|
||||
| s d :=
|
||||
let iniSz := d.stackSize in
|
||||
let iniPos := d.pos in
|
||||
let d := p s d in
|
||||
if d.hasError && d.pos == iniPos then q s (d.restore iniSz iniPos) else d
|
||||
|
||||
@[noinline] def orelseInfo (p q : ParserInfo) : ParserInfo :=
|
||||
{ updateTokens := q.updateTokens ∘ p.updateTokens,
|
||||
firstToken :=
|
||||
match p.firstToken, q.firstToken with
|
||||
| some tk₁, some tk₂ := if tk₁ == tk₂ then some tk₁ else none
|
||||
| _, _ := none }
|
||||
|
||||
def ParserData.resetPos : ParserData → String.Pos → ParserData
|
||||
| ⟨stack, _, cache, errorMsg⟩ pos := ⟨stack, pos, cache, errorMsg⟩
|
||||
|
||||
@[inline] def tryFn (p : ParserFn) : ParserFn
|
||||
| s d :=
|
||||
let iniPos := d.pos in
|
||||
let d := p s d in
|
||||
if d.hasError then d.resetPos iniPos else d
|
||||
|
||||
@[noinline] def noFirstTokenInfo (info : ParserInfo) : ParserInfo :=
|
||||
{ updateTokens := info.updateTokens,
|
||||
firstToken := none }
|
||||
|
||||
@[inline] def optionalFn (p : ParserFn) : ParserFn :=
|
||||
λ s d,
|
||||
let iniSz := d.stackSize in
|
||||
let iniPos := d.pos in
|
||||
let d := p s d in
|
||||
let d := if d.hasError then d.restore iniSz iniPos else d in
|
||||
d.mkNode nullKind iniSz
|
||||
|
||||
def ParserData.mkError (d : ParserData) (msg : String) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, _⟩ := ⟨stack, pos, cache, some msg⟩
|
||||
|
||||
def ParserData.mkEOIError (d : ParserData) : ParserData :=
|
||||
d.mkError "end of input"
|
||||
|
||||
@[specialize] partial def manyAux (p : ParserFn) : String → ParserData → ParserData
|
||||
| s d :=
|
||||
let iniSz := d.stackSize in
|
||||
let iniPos := d.pos in
|
||||
let d := p s d in
|
||||
if d.hasError then d.restore iniSz iniPos
|
||||
else if iniPos == d.pos then d.mkError "invalid 'many' parser combinator application, parser did not consume anything"
|
||||
else manyAux s d
|
||||
|
||||
@[inline] def manyFn (p : ParserFn) : ParserFn
|
||||
| s d :=
|
||||
let iniSz := d.stackSize in
|
||||
let d := manyAux p s d in
|
||||
d.mkNode nullKind iniSz
|
||||
|
||||
@[specialize] partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
if p c then d.next s i
|
||||
else d.mkError errorMsg
|
||||
|
||||
@[specialize] partial def takeUntilFn (p : Char → Bool) : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d
|
||||
else
|
||||
let c := s.get i in
|
||||
if p c then d
|
||||
else takeUntilFn s (d.next s i)
|
||||
|
||||
@[specialize] def takeWhileFn (p : Char → Bool) : ParserFn :=
|
||||
takeUntilFn (λ c, !p c)
|
||||
|
||||
@[inline] def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : ParserFn :=
|
||||
andthenFn (satisfyFn p errorMsg) (takeWhileFn p)
|
||||
|
||||
partial def finishCommentBlock : Nat → ParserFn
|
||||
| nesting s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
let i := s.next i in
|
||||
if c == '-' then
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
if c == '/' then -- "-/" end of comment
|
||||
if nesting == 1 then d.next s i
|
||||
else finishCommentBlock (nesting-1) s (d.next s i)
|
||||
else
|
||||
finishCommentBlock nesting s (d.next s i)
|
||||
else if c == '/' then
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
if c == '-' then finishCommentBlock (nesting+1) s (d.next s i)
|
||||
else finishCommentBlock nesting s (d.setPos i)
|
||||
else finishCommentBlock nesting s (d.setPos i)
|
||||
|
||||
/- Consume whitespace and comments -/
|
||||
partial def whitespace : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d
|
||||
else
|
||||
let c := s.get i in
|
||||
if c.isWhitespace then whitespace s (d.next s i)
|
||||
else if c == '-' then
|
||||
let i := s.next i in
|
||||
let c := s.get i in
|
||||
if c == '-' then andthenFn (takeUntilFn (= '\n')) whitespace s (d.next s i)
|
||||
else d
|
||||
else if c == '/' then
|
||||
let i := s.next i in
|
||||
let c := s.get i in
|
||||
if c == '-' then
|
||||
let i := s.next i in
|
||||
let c := s.get i in
|
||||
if c == '-' then d -- "/--" doc comment is an actual token
|
||||
else andthenFn (finishCommentBlock 1) whitespace s (d.next s i)
|
||||
else d
|
||||
else d
|
||||
|
||||
def mkEmptySubstringAt (s : String) (p : Nat) : Substring :=
|
||||
{str := s, startPos := p, stopPos := p }
|
||||
|
||||
private def rawAux (startPos : Nat) (trailingWs : Bool) : ParserFn
|
||||
| s d :=
|
||||
let stopPos := d.pos in
|
||||
let leading := mkEmptySubstringAt s startPos in
|
||||
let val := s.extract startPos stopPos in
|
||||
if trailingWs then
|
||||
let d := whitespace s d in
|
||||
let stopPos' := d.pos in
|
||||
let trailing : Substring := { str := s, startPos := stopPos, stopPos := stopPos' } in
|
||||
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
|
||||
d.pushSyntax atom
|
||||
else
|
||||
let trailing := mkEmptySubstringAt s stopPos in
|
||||
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
|
||||
d.pushSyntax atom
|
||||
|
||||
/-- Match an arbitrary Parser and return the consumed String in a `Syntax.atom`. -/
|
||||
@[inline] def rawFn (p : ParserFn) (trailingWs := false) : ParserFn
|
||||
| s d :=
|
||||
let startPos := d.pos in
|
||||
let d := p s d in
|
||||
if d.hasError then d else rawAux startPos trailingWs s d
|
||||
|
||||
def hexDigitFn : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
let i := s.next i in
|
||||
if c.isDigit || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') then d.setPos i
|
||||
else d.mkError "invalid hexadecimal numeral, hexadecimal digit expected"
|
||||
|
||||
def quotedCharFn : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
if c == '\\' || c == '\"' || c == '\'' || c == '\n' || c == '\t' then
|
||||
d.next s i
|
||||
else if c == 'x' then
|
||||
andthenFn hexDigitFn hexDigitFn s (d.next s i)
|
||||
else if c == 'u' then
|
||||
andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) s (d.next s i)
|
||||
else
|
||||
d.mkError "invalid escape sequence"
|
||||
|
||||
def mkStrLitKind : IO SyntaxNodeKind := nextKind `strLit
|
||||
@[init mkStrLitKind] constant strLitKind : SyntaxNodeKind := default _
|
||||
|
||||
/-- Push `(Syntax.node tk <new-atom>)` into syntax stack -/
|
||||
def mkNodeToken (k : SyntaxNodeKind) (startPos : Nat) (s : String) (d : ParserData) : ParserData :=
|
||||
let stopPos := d.pos in
|
||||
let leading := mkEmptySubstringAt s startPos in
|
||||
let val := s.extract startPos stopPos in
|
||||
let d := whitespace s d in
|
||||
let wsStopPos := d.pos in
|
||||
let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in
|
||||
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
|
||||
let tk := Syntax.node k (Array.singleton atom) [] in
|
||||
d.pushSyntax tk
|
||||
|
||||
partial def strLitFnAux (startPos : Nat) : ParserFn
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
let d := d.setPos (s.next i) in
|
||||
if c == '\"' then
|
||||
mkNodeToken strLitKind startPos s d
|
||||
else if c == '\\' then andthenFn quotedCharFn strLitFnAux s d
|
||||
else strLitFnAux s d
|
||||
|
||||
def mkNumberKind : IO SyntaxNodeKind := nextKind `number
|
||||
@[init mkNumberKind] constant numberKind : SyntaxNodeKind := default _
|
||||
|
||||
def decimalNumberFn (startPos : Nat) : ParserFn
|
||||
| s d :=
|
||||
let d := takeWhileFn (λ c, c.isDigit) s d in
|
||||
let i := d.pos in
|
||||
let c := s.get i in
|
||||
let d :=
|
||||
if c == '.' then
|
||||
let i := s.next i in
|
||||
let c := s.get i in
|
||||
if c.isDigit then
|
||||
takeWhileFn (λ c, c.isDigit) s (d.setPos i)
|
||||
else
|
||||
d
|
||||
else
|
||||
d in
|
||||
mkNodeToken numberKind startPos s d
|
||||
|
||||
def binNumberFn (startPos : Nat) : ParserFn
|
||||
| s d :=
|
||||
let d := takeWhile1Fn (λ c, c == '0' || c == '1') "expected binary number" s d in
|
||||
mkNodeToken numberKind startPos s d
|
||||
|
||||
def octalNumberFn (startPos : Nat) : ParserFn
|
||||
| s d :=
|
||||
let d := takeWhile1Fn (λ c, '0' ≤ c && c ≤ '7') "expected octal number" s d in
|
||||
mkNodeToken numberKind startPos s d
|
||||
|
||||
def hexNumberFn (startPos : Nat) : ParserFn
|
||||
| s d :=
|
||||
let d := takeWhile1Fn (λ c, ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')) "expected hexadecimal number" s d in
|
||||
mkNodeToken numberKind startPos s d
|
||||
|
||||
def numberFnAux : ParserFn
|
||||
| s d :=
|
||||
let startPos := d.pos in
|
||||
if s.atEnd startPos then d.mkEOIError
|
||||
else
|
||||
let c := s.get startPos in
|
||||
if c == '0' then
|
||||
let i := s.next startPos in
|
||||
let c := s.get i in
|
||||
if c == 'b' || c == 'B' then
|
||||
binNumberFn startPos s (d.next s i)
|
||||
else if c == 'o' || c == 'O' then
|
||||
octalNumberFn startPos s (d.next s i)
|
||||
else if c == 'x' || c == 'X' then
|
||||
hexNumberFn startPos s (d.next s i)
|
||||
else
|
||||
decimalNumberFn startPos s (d.next s i)
|
||||
else if c.isDigit then
|
||||
decimalNumberFn startPos s (d.next s startPos)
|
||||
else
|
||||
d.mkError "expected numeral"
|
||||
|
||||
def isIdCont : String → ParserData → Bool
|
||||
| s d :=
|
||||
let i := d.pos in
|
||||
let c := s.get i in
|
||||
if c == '.' then
|
||||
let i := s.next i in
|
||||
if s.atEnd i then
|
||||
false
|
||||
else
|
||||
let c := s.get i in
|
||||
isIdFirst c || isIdBeginEscape c
|
||||
else
|
||||
false
|
||||
|
||||
private def isToken (idStartPos idStopPos : Nat) (tk : Option TokenConfig) : Bool :=
|
||||
match tk with
|
||||
| none := false
|
||||
| some tk :=
|
||||
-- if a token is both a symbol and a valid identifier (i.e. a keyword),
|
||||
-- we want it to be recognized as a symbol
|
||||
tk.val.bsize ≥ idStopPos - idStopPos
|
||||
|
||||
|
||||
def mkTokenAndFixPos (startPos : Nat) (tk : Option TokenConfig) (s : String) (d : ParserData) : ParserData :=
|
||||
match tk with
|
||||
| none :=
|
||||
let d := d.setPos startPos in
|
||||
d.mkError "token expected"
|
||||
| some tk :=
|
||||
let leading := mkEmptySubstringAt s startPos in
|
||||
let val := tk.val in
|
||||
let stopPos := startPos + val.bsize in
|
||||
let d := d.setPos stopPos in
|
||||
let wsStopPos := d.pos in
|
||||
let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in
|
||||
let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in
|
||||
d.pushSyntax atom
|
||||
|
||||
def mkIdResult (startPos : Nat) (tk : Option TokenConfig) (val : Name) (s : String) (d : ParserData) : ParserData :=
|
||||
let stopPos := d.pos in
|
||||
if isToken startPos stopPos tk then
|
||||
mkTokenAndFixPos startPos tk s d
|
||||
else
|
||||
let rawVal : Substring := { str := s, startPos := startPos, stopPos := stopPos } in
|
||||
let d := whitespace s d in
|
||||
let trailingStopPos := d.pos in
|
||||
let leading := mkEmptySubstringAt s startPos in
|
||||
let trailing : Substring := { str := s, startPos := stopPos, stopPos := trailingStopPos } in
|
||||
let info : SourceInfo := {leading := leading, trailing := trailing, pos := startPos} in
|
||||
let atom := Syntax.ident (some info) rawVal val [] [] in
|
||||
d.pushSyntax atom
|
||||
|
||||
partial def identFnAux (startPos : Nat) (tk : Option TokenConfig) : Name → ParserFn
|
||||
| r s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
let c := s.get i in
|
||||
if isIdBeginEscape c then
|
||||
let startPart := s.next i in
|
||||
let d := takeUntilFn isIdEndEscape s (d.setPos startPart) in
|
||||
let stopPart := d.pos in
|
||||
let d := satisfyFn isIdEndEscape "end of escaped identifier expected" s d in
|
||||
if d.hasError then d
|
||||
else
|
||||
let r := Name.mkString r (s.extract startPart stopPart) in
|
||||
if isIdCont s d then
|
||||
identFnAux r s d
|
||||
else
|
||||
mkIdResult startPos tk r s d
|
||||
else if isIdFirst c then
|
||||
let startPart := i in
|
||||
let d := takeWhileFn isIdRest s (d.next s i) in
|
||||
let stopPart := d.pos in
|
||||
let r := Name.mkString r (s.extract startPart stopPart) in
|
||||
if isIdCont s d then
|
||||
identFnAux r s d
|
||||
else
|
||||
mkIdResult startPart tk r s d
|
||||
else
|
||||
mkTokenAndFixPos startPos tk s d
|
||||
|
||||
structure AbsParser (ρ : Type) :=
|
||||
(info : Thunk ParserInfo := Thunk.mk (λ _, {}))
|
||||
(fn : ρ)
|
||||
|
||||
abbrev Parser := AbsParser ParserFn
|
||||
|
||||
class ParserFnLift (ρ : Type) :=
|
||||
(lift {} : ParserFn → ρ)
|
||||
(map : (ParserFn → ParserFn) → ρ → ρ)
|
||||
(map₂ : (ParserFn → ParserFn → ParserFn) → ρ → ρ → ρ)
|
||||
|
||||
instance parserLiftInhabited {ρ : Type} [ParserFnLift ρ] : Inhabited ρ :=
|
||||
⟨ParserFnLift.lift (default _)⟩
|
||||
|
||||
instance idParserLift : ParserFnLift ParserFn :=
|
||||
{ lift := λ p, p,
|
||||
map := λ m p, m p,
|
||||
map₂ := λ m p1 p2, m p1 p2 }
|
||||
|
||||
@[inline]
|
||||
def liftParser {ρ : Type} [ParserFnLift ρ] (info : Thunk ParserInfo) (fn : ParserFn) : AbsParser ρ :=
|
||||
{ info := info, fn := ParserFnLift.lift fn }
|
||||
|
||||
@[inline]
|
||||
def mapParser {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn) : AbsParser ρ → AbsParser ρ :=
|
||||
λ p, { info := Thunk.mk (λ _, infoFn p.info.get), fn := ParserFnLift.map pFn p.fn }
|
||||
|
||||
@[inline]
|
||||
def mapParser₂ {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn → ParserFn)
|
||||
: AbsParser ρ → AbsParser ρ → AbsParser ρ :=
|
||||
λ p q, { info := Thunk.mk (λ _, infoFn p.info.get q.info.get), fn := ParserFnLift.map₂ pFn p.fn q.fn }
|
||||
|
||||
def EnvParserFn (α : Type) (ρ : Type) :=
|
||||
α → ρ
|
||||
|
||||
def RecParserFn (α ρ : Type) :=
|
||||
EnvParserFn (α → ρ) ρ
|
||||
|
||||
instance envParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (EnvParserFn α ρ) :=
|
||||
{ lift := λ p a, ParserFnLift.lift p,
|
||||
map := λ m p a, ParserFnLift.map m (p a),
|
||||
map₂ := λ m p1 p2 a, ParserFnLift.map₂ m (p1 a) (p2 a) }
|
||||
|
||||
instance recParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (RecParserFn α ρ) :=
|
||||
inferInstanceAs (ParserFnLift (EnvParserFn (α → ρ) ρ))
|
||||
|
||||
namespace RecParserFn
|
||||
variables {α ρ : Type}
|
||||
|
||||
@[inline] def recurse (a : α) : RecParserFn α ρ :=
|
||||
λ p, p a
|
||||
|
||||
@[inline] def run [ParserFnLift ρ] (x : RecParserFn α ρ) (rec : α → RecParserFn α ρ) : ρ :=
|
||||
x (fix (λ f a, rec a f))
|
||||
|
||||
end RecParserFn
|
||||
|
||||
@[inline] def andthen {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ :=
|
||||
mapParser₂ andthenInfo andthenFn
|
||||
|
||||
@[inline] def node {ρ : Type} [ParserFnLift ρ] (k : SyntaxNodeKind) : AbsParser ρ → AbsParser ρ :=
|
||||
mapParser nodeInfo (nodeFn k)
|
||||
|
||||
@[inline] def orelse {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ :=
|
||||
mapParser₂ orelseInfo orelseFn
|
||||
|
||||
@[inline] def try {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
|
||||
mapParser noFirstTokenInfo tryFn
|
||||
|
||||
@[inline] def many {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
|
||||
mapParser noFirstTokenInfo manyFn
|
||||
|
||||
@[inline] def optional {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ :=
|
||||
mapParser noFirstTokenInfo optionalFn
|
||||
|
||||
@[inline] def many1 {ρ : Type} [ParserFnLift ρ] (p : AbsParser ρ) : AbsParser ρ :=
|
||||
andthen p (many p)
|
||||
|
||||
abbrev BasicParserFn : Type := EnvParserFn ParserConfig ParserFn
|
||||
abbrev BasicParser : Type := AbsParser BasicParserFn
|
||||
abbrev CmdParserFn (ρ : Type) : Type := EnvParserFn ρ (RecParserFn Unit ParserFn)
|
||||
abbrev TermParserFn : Type := RecParserFn Nat (CmdParserFn ParserConfig)
|
||||
abbrev TermParser : Type := AbsParser TermParserFn
|
||||
abbrev TrailingTermParser : Type := AbsParser (EnvParserFn Syntax TermParserFn)
|
||||
|
||||
def TokenMap (α : Type) := RBMap Name (List α) Name.quickLt
|
||||
|
||||
structure CommandParserConfig extends ParserConfig :=
|
||||
(leadingTermParsers : TokenMap TermParser)
|
||||
(trailingTermParsers : TokenMap TrailingTermParser)
|
||||
-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them
|
||||
(localLeadingTermParsers : TokenMap TermParser := mkRBMap _ _ _)
|
||||
(localTrailingTermParsers : TokenMap TrailingTermParser := mkRBMap _ _ _)
|
||||
|
||||
abbrev CommandParser : Type := AbsParser (CmdParserFn CommandParserConfig)
|
||||
|
||||
@[inline] def Term.parser (rbp : Nat := 0) : TermParser :=
|
||||
{ fn := RecParserFn.recurse rbp }
|
||||
|
||||
@[inline] def Command.parser : CommandParser :=
|
||||
{ fn := λ _, RecParserFn.recurse () }
|
||||
|
||||
@[inline] def basicParser2TermParser (p : BasicParser) : TermParser :=
|
||||
{ info := Thunk.mk (λ _, p.info.get), fn := λ _ cfg _, p.fn cfg }
|
||||
|
||||
instance basic2term : HasCoe BasicParser TermParser :=
|
||||
⟨basicParser2TermParser⟩
|
||||
|
||||
@[inline] def basicParser2CmdParser (p : BasicParser) : CommandParser :=
|
||||
{ info := Thunk.mk (λ _, p.info.get), fn := λ cfg _, p.fn cfg.toParserConfig }
|
||||
|
||||
instance basicmd : HasCoe BasicParser CommandParser :=
|
||||
⟨basicParser2CmdParser⟩
|
||||
|
||||
private def tokenFnAux : BasicParserFn
|
||||
| cfg s d :=
|
||||
let i := d.pos in
|
||||
let c := s.get i in
|
||||
if c == '\"' then
|
||||
strLitFnAux i s (d.next s i)
|
||||
else if c.isDigit then
|
||||
numberFnAux s d
|
||||
else
|
||||
let (_, tk) := cfg.tokens.matchPrefix s i in
|
||||
identFnAux i tk Name.anonymous s d
|
||||
|
||||
private def updateCache (startPos : Nat) (d : ParserData) : ParserData :=
|
||||
match d with
|
||||
| ⟨stack, pos, cache, none⟩ :=
|
||||
if stack.size == 0 then d
|
||||
else
|
||||
let tk := stack.back in
|
||||
⟨stack, pos, { tokenCache := some { startPos := startPos, stopPos := pos, token := tk } }, none⟩
|
||||
| other := other
|
||||
|
||||
def tokenFn : BasicParserFn
|
||||
| cfg s d :=
|
||||
let i := d.pos in
|
||||
if s.atEnd i then d.mkEOIError
|
||||
else
|
||||
match d.cache with
|
||||
| { tokenCache := some tkc } :=
|
||||
if tkc.startPos == i then
|
||||
let d := d.pushSyntax tkc.token in
|
||||
d.setPos tkc.stopPos
|
||||
else
|
||||
let d := tokenFnAux cfg s d in
|
||||
updateCache i d
|
||||
| _ :=
|
||||
let d := tokenFnAux cfg s d in
|
||||
updateCache i d
|
||||
|
||||
def symbolFnAux (sym : String) (errorMsg : String) : BasicParserFn
|
||||
| cfg s d :=
|
||||
let d := tokenFn cfg s d in
|
||||
if d.hasError then
|
||||
d.mkError errorMsg
|
||||
else
|
||||
match d.stxStack.back with
|
||||
| Syntax.atom _ sym' := if sym == sym' then d else d.mkError errorMsg
|
||||
| _ := d.mkError errorMsg
|
||||
|
||||
@[inline] def symbolFn (sym : String) : BasicParserFn :=
|
||||
symbolFnAux sym ("expected '" ++ sym ++ "'")
|
||||
|
||||
def symbolInfo (sym : String) (lbp : Nat) : ParserInfo :=
|
||||
{ updateTokens := λ trie, trie.insert sym { val := sym, lbp := lbp },
|
||||
firstToken := some { val := sym, lbp := lbp } }
|
||||
|
||||
@[inline] def symbol (sym : String) (lbp : Nat := 0) : BasicParser :=
|
||||
{ info := Thunk.mk (λ _, symbolInfo sym lbp),
|
||||
fn := symbolFn sym }
|
||||
|
||||
def numberFn : BasicParserFn
|
||||
| cfg s d :=
|
||||
let d := tokenFn cfg s d in
|
||||
if d.hasError || !(d.stxStack.back.isOfKind numberKind) then
|
||||
d.mkError "expected numeral"
|
||||
else
|
||||
d
|
||||
|
||||
@[inline] def number : BasicParser :=
|
||||
{ fn := numberFn }
|
||||
|
||||
def strLitFn : BasicParserFn
|
||||
| cfg s d :=
|
||||
let d := tokenFn cfg s d in
|
||||
if d.hasError || !(d.stxStack.back.isOfKind strLitKind) then
|
||||
d.mkError "expected string literal"
|
||||
else
|
||||
d
|
||||
|
||||
@[inline] def strLit : BasicParser :=
|
||||
{ fn := numberFn }
|
||||
|
||||
def mkFrontendConfig (filename input : String) : FrontendConfig :=
|
||||
{ filename := filename,
|
||||
input := input,
|
||||
fileMap := input.toFileMap }
|
||||
|
||||
def BasicParser.run (p : BasicParser) (input : String) (filename : String := "<input>") : Except String Syntax :=
|
||||
let frontendCfg := mkFrontendConfig filename input in
|
||||
let tokens := p.info.get.updateTokens {} in
|
||||
let cfg : ParserConfig := { tokens := tokens, .. frontendCfg } in
|
||||
let d : ParserData := { stxStack := Array.empty, pos := 0, cache := {}, errorMsg := none } in
|
||||
let d := p.fn cfg input d in
|
||||
if d.hasError then
|
||||
Except.error (d.toErrorMsg cfg)
|
||||
else
|
||||
Except.ok d.stxStack.back
|
||||
|
||||
|
||||
local infix ` ; `:10 := Parser.andthen
|
||||
local infix ` || `:5 := Parser.orelse
|
||||
|
||||
def mkTestKind : IO SyntaxNodeKind := nextKind `test
|
||||
@[init mkTestKind] constant testKind : SyntaxNodeKind := default _
|
||||
|
||||
set_option pp.implicit true
|
||||
set_option pp.binder_types false
|
||||
set_option trace.compiler.stage2 true
|
||||
set_option trace.compiler.boxed true
|
||||
-- set_option trace.compiler.stage1 true
|
||||
-- set_option trace.compiler.lcnf true
|
||||
-- set_option trace.compiler.lcnf true
|
||||
-- set_option trace.compiler.simp true
|
||||
|
||||
@[inline2]
|
||||
def p0 : BasicParser :=
|
||||
node testKind (symbol "foo"; many (symbol "boo"))
|
||||
|
||||
@[inline2]
|
||||
def p1 (s : String) : TermParser :=
|
||||
symbol "hello"; symbol "world"; symbol "boo"
|
||||
||
|
||||
symbol s
|
||||
||
|
||||
symbol "opt3"; symbol "boo"
|
||||
|
||||
@[inline2]
|
||||
def p2 (s : String) : TermParser :=
|
||||
-- symbol "boo"; p1; p1; p0
|
||||
p1 "hello"; p0; p1 s
|
||||
|
||||
@[inline2]
|
||||
def p3 (s : String) : TermParser :=
|
||||
p1 s
|
||||
||
|
||||
p2 s
|
||||
||
|
||||
symbol "boo"; p2 s
|
||||
|
||||
@[inline2]
|
||||
def p4 (s : String) : CommandParser :=
|
||||
symbol s; symbol "boo"
|
||||
|
||||
end Parser
|
||||
-- end Lean
|
||||
|
|
@ -236,6 +236,25 @@ partial def reprint : Syntax → Option String
|
|||
else args.mfoldl "" (λ stx r, do s ← reprint stx, pure $ r ++ s)
|
||||
| missing := ""
|
||||
|
||||
open Lean.Format
|
||||
|
||||
protected partial def toFormat : Syntax → Format
|
||||
| (atom info val) := toFmt $ repr val
|
||||
| (ident _ _ val pre scopes) :=
|
||||
let scopes := pre.map toFmt ++ scopes.reverse.map toFmt in
|
||||
let scopes := match scopes with [] := toFmt "" | _ := bracket "{" (joinSep scopes ", ") "}" in
|
||||
toFmt "`" ++ toFmt val ++ scopes
|
||||
| (node kind args scopes) :=
|
||||
let scopes := match scopes with [] := toFmt "" | _ := bracket "{" (joinSep scopes.reverse ", ") "}" in
|
||||
if kind.name = `Lean.Parser.noKind then
|
||||
sbracket $ scopes ++ joinSep (args.toList.map toFormat) line
|
||||
else
|
||||
let shorterName := kind.name.replacePrefix `Lean.Parser Name.anonymous in
|
||||
paren $ joinSep ((toFmt shorterName ++ scopes) :: args.toList.map toFormat) line
|
||||
| missing := "<missing>"
|
||||
|
||||
instance : HasToFormat Syntax := ⟨Syntax.toFormat⟩
|
||||
instance : HasToString Syntax := ⟨toString ∘ toFmt⟩
|
||||
end Syntax
|
||||
|
||||
end Parser
|
||||
|
|
|
|||
|
|
@ -1,59 +1,28 @@
|
|||
import parser
|
||||
open Parser
|
||||
local infix ` ; `:10 := Parser.andthen
|
||||
local infix ` || `:5 := Parser.orelse
|
||||
|
||||
abbrev Parser (α : Type) := ReaderT Nat (Lean.Parser.ParserM Unit Unit Unit) α
|
||||
def mkTestKind : IO SyntaxNodeKind := nextKind `test
|
||||
@[init mkTestKind] constant testKind : SyntaxNodeKind := default _
|
||||
|
||||
open Lean.Parser
|
||||
@[inline2]
|
||||
def numPair : BasicParser :=
|
||||
node testKind $
|
||||
symbol "("; number; symbol ","; number; symbol ")"
|
||||
|
||||
-- setOption pp.implicit True
|
||||
-- setOption Trace.Compiler.boxed True
|
||||
def numPairs : BasicParser :=
|
||||
many numPair
|
||||
|
||||
def testP : Parser Unit :=
|
||||
many1 (str "++" <|> str "**" <|> (str "--" *> takeUntil (= '\n') *> any *> pure ()))
|
||||
@[noinline] def test (p : BasicParser) (s : String) : IO Unit :=
|
||||
match p.run s with
|
||||
| Except.error msg := IO.println "error"
|
||||
| Except.ok stx := IO.println "ok"
|
||||
|
||||
def testP2 : Parser Unit :=
|
||||
many1 ((strLit *> whitespace *> pure ()) <|> (str "--" *> takeUntil (= '\n') *> any *> pure ()))
|
||||
|
||||
def testP3 : Parser Unit :=
|
||||
leanWhitespace
|
||||
|
||||
def testParser (x : Parser Unit) (input : String) : String :=
|
||||
match (x 0).run () () input with
|
||||
| Lean.Parser.Result.ok _ i _ _ := "Ok at " ++ toString i
|
||||
| Result.error msg i _ _ := "Error at " ++ toString i ++ ": " ++ msg
|
||||
|
||||
def IO.testParser {α : Type} [HasToString α] (x : Parser α) (input : String) : IO Unit :=
|
||||
match (x 0).run () () input with
|
||||
| Lean.Parser.Result.ok a _ _ _ := IO.println ("Ok " ++ toString a)
|
||||
| _ := throw "ERROR"
|
||||
|
||||
@[noinline] def test (p : Parser Unit) (s : String) : IO Unit :=
|
||||
IO.println (testParser p s)
|
||||
|
||||
def mkBigString : Nat → String → String
|
||||
def mkNumPairString : Nat → String → String
|
||||
| 0 s := s
|
||||
| (n+1) s := mkBigString n (s ++ "-- new comment\n")
|
||||
|
||||
def mkBigString2 : Nat → String → String
|
||||
| 0 s := s
|
||||
| (n+1) s := mkBigString2 n (s ++ "\"hello\\nworld\"\n-- comment\n")
|
||||
|
||||
def mkBigString3 : Nat → String → String
|
||||
| 0 s := s
|
||||
| (n+1) s := mkBigString3 n (s ++ "/- /- comment 1 -/ -/ \n -- comment 2 \n \t \n ")
|
||||
|
||||
def prof {α : Type} (msg : String) (p : IO α) : IO α :=
|
||||
let msg₁ := "Time for '" ++ msg ++ "':" in
|
||||
let msg₂ := "Memory usage for '" ++ msg ++ "':" in
|
||||
allocprof msg₂ (timeit msg₁ p)
|
||||
| (n+1) s := mkNumPairString n (s ++ "(" ++ toString n ++ ", " ++ toString n ++ ") -- comment\n")
|
||||
|
||||
def main (xs : List String) : IO Unit :=
|
||||
do
|
||||
let s₁ := mkBigString xs.head.toNat "",
|
||||
let s₂ := s₁ ++ "bad" ++ mkBigString 20 "",
|
||||
let s₃ := mkBigString2 xs.head.toNat "",
|
||||
let s₄ := mkBigString3 xs.head.toNat "",
|
||||
IO.println s₄.length,
|
||||
prof "Parser 1" (test testP s₁),
|
||||
prof "Parser 2" (test testP s₂),
|
||||
prof "Parser 3" (test testP2 s₃),
|
||||
prof "Parser 4" (test testP3 s₄)
|
||||
let s := mkNumPairString xs.head.toNat "" in
|
||||
test numPairs s
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue