diff --git a/tests/playground/parser/Makefile b/tests/playground/parser/Makefile index c5d1223148..15606c4264 100644 --- a/tests/playground/parser/Makefile +++ b/tests/playground/parser/Makefile @@ -1,7 +1,7 @@ LEANC=../../../bin/leanc LEAN=../../../bin/lean COMMON_SRCS = parser.lean syntax.lean filemap.lean -SRCS = $(COMMON_SRCS) test1.lean test2.lean +SRCS = $(COMMON_SRCS) test1.lean OLEANS = $(SRCS:.lean=.olean) CPPS = $(SRCS:.lean=.cpp) COMMON_OBJS = $(COMMON_SRCS:.lean=.o) @@ -31,8 +31,8 @@ depends: $(DEPS) test1: test1.o $(COMMON_OBJS) $(LEANC) -o $@ $^ -test2: test2.o $(COMMON_OBJS) - $(LEANC) -o $@ $^ +# test2: test2.o $(COMMON_OBJS) +# $(LEANC) -o $@ $^ clean: rm -f *.olean diff --git a/tests/playground/parser/parser.lean b/tests/playground/parser/parser.lean index a459754b2b..091b0e98db 100644 --- a/tests/playground/parser/parser.lean +++ b/tests/playground/parser/parser.lean @@ -1,337 +1,671 @@ -namespace Lean +import init.lean.name init.lean.parser.trie init.lean.parser.identifier +import syntax filemap + +open Lean +export Lean.Parser (Trie) +-- namespace Lean namespace Parser -abbrev Pos := String.Pos +structure FrontendConfig := +(filename : String) +(input : String) +(fileMap : FileMap) -/- -σ is the non-backtrackable State -δ is the backtrackable State -μ is the extra error Message data --/ -inductive Result (σ δ μ α : Type) -| ok {} (a : α) (i : Pos) (st : σ) (bst : δ) : Result -| error {} (msg : String) (i : Pos) (st : σ) (extra : Option μ) : Result +structure TokenConfig := +(val : String) +(lbp : Nat := 0) -inductive Result.IsOk {σ δ μ α : Type} : Result σ δ μ α → Prop -| mk (a : α) (i : Pos) (st : σ) (bst : δ) : Result.IsOk (Result.ok a i st bst) +namespace TokenConfig -theorem errorIsNotOk {σ δ μ α : Type} {msg : String} {i : Pos} {st : σ} {extra : Option μ} - (h : Result.IsOk (@Result.error σ δ μ α msg i st extra)) : False := -match h with end +def beq : TokenConfig → TokenConfig → Bool +| ⟨val₁, lbp₁⟩ ⟨val₂, lbp₂⟩ := val₁ == val₂ && lbp₁ == lbp₂ -@[inline] def unreachableError {σ δ μ α β : Type} {msg : String} {i : Pos} {st : σ} {extra : Option μ} - (h : Result.IsOk (@Result.error σ δ μ α msg i st extra)) : β := -False.elim (errorIsNotOk h) +instance : HasBeq TokenConfig := +⟨beq⟩ -def input (σ δ μ : Type) : Type := { r : Result σ δ μ Unit // r.IsOk } +end TokenConfig -@[inline] def mkInput {σ δ μ : Type} (i : Pos) (st : σ) (bst : δ) : input σ δ μ := -⟨Result.ok () i st bst, Result.IsOk.mk _ _ _ _⟩ +structure TokenCacheEntry := +(startPos stopPos : String.Pos) +(token : Syntax) -def ParserM (σ δ μ α : Type) := -String → input σ δ μ → Result σ δ μ α +structure ParserCache := +(tokenCache : Option TokenCacheEntry := none) -variables {σ δ μ α β : Type} +structure ParserConfig extends FrontendConfig := +(tokens : Trie TokenConfig) -namespace ParserM +structure ParserData := +(stxStack : Array Syntax) (pos : String.Pos) (cache : ParserCache) (errorMsg : Option String) -protected def default : ParserM σ δ μ α := -λ _ inp, - match inp with - | ⟨Result.ok _ i st bst, h⟩ := Result.error "" i st none - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +@[inline] def ParserData.hasError (d : ParserData) : Bool := +d.errorMsg != none -instance : Inhabited (ParserM σ δ μ α) := -⟨ParserM.default⟩ +@[inline] def ParserData.stackSize (d : ParserData) : Nat := +d.stxStack.size -@[inline] def run (p : ParserM σ δ μ α) (st : σ) (bst : δ) (s : String) : Result σ δ μ α := -p s (mkInput 0 st bst) +def ParserData.restore (d : ParserData) (iniStackSz : Nat) (iniPos : Nat) : ParserData := +match d with +| ⟨stack, _, cache, _⟩ := ⟨stack.shrink iniStackSz, iniPos, cache, none⟩ -@[inline] def pure (a : α) : ParserM σ δ μ α := -λ _ inp, - match inp with - | ⟨Result.ok _ it st bst, h⟩ := Result.ok a it st bst - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +def ParserData.setPos (d : ParserData) (pos : Nat) : ParserData := +match d with +| ⟨stack, _, cache, msg⟩ := ⟨stack, pos, cache, msg⟩ -@[inline] def bind (x : ParserM σ δ μ α) (f : α → ParserM σ δ μ β) : ParserM σ δ μ β := -λ str inp, - match x str inp with - | Result.ok a i st bst := f a str (mkInput i st bst) - | Result.error msg i st etx := Result.error msg i st etx +def ParserData.setCache (d : ParserData) (cache : ParserCache) : ParserData := +match d with +| ⟨stack, pos, _, msg⟩ := ⟨stack, pos, cache, msg⟩ -instance isMonad : Monad (ParserM σ δ μ) := -{pure := @ParserM.pure _ _ _, bind := @ParserM.bind _ _ _} +def ParserData.pushSyntax (d : ParserData) (n : Syntax) : ParserData := +match d with +| ⟨stack, pos, cache, msg⟩ := ⟨stack.push n, pos, cache, msg⟩ -def mkError (r : input σ δ μ) (msg : String) : Result σ δ μ α := -match r with -| ⟨Result.ok _ i st _, _⟩ := Result.error msg i st none -| ⟨Result.error _ _ _ _, h⟩ := unreachableError h +def ParserData.next (d : ParserData) (s : String) (pos : Nat) : ParserData := +d.setPos (s.next pos) -@[inline] def orelse (p q : ParserM σ δ μ α) : ParserM σ δ μ α := -λ str inp, - match inp with - | ⟨Result.ok _ i₁ _ bst₁, _⟩ := - (match p str inp with - | err@(Result.error _ i₂ st₂ _) := if i₁ == i₂ then q str (mkInput i₁ st₂ bst₁) else err - | other := other) - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +def ParserData.toErrorMsg (d : ParserData) (cfg : ParserConfig) : String := +match d.errorMsg with +| none := "" +| some msg := + let pos := cfg.fileMap.toPosition d.pos in + cfg.filename ++ ":" ++ toString pos.line ++ ":" ++ toString pos.column ++ " " ++ msg -@[inline] def failure : ParserM σ δ μ α := -λ _ inp, mkError inp "failure" +def ParserFn := String → ParserData → ParserData -instance : Alternative (ParserM σ δ μ) := -{ orelse := @ParserM.orelse _ _ _, - failure := @ParserM.failure _ _ _, - .. ParserM.isMonad } +instance : Inhabited ParserFn := +⟨λ s, id⟩ -@[inline] def currPos : input σ δ μ → Pos -| ⟨Result.ok _ i _ _, _⟩ := i -| ⟨Result.error _ _ _ _, h⟩ := unreachableError h +structure ParserInfo := +(updateTokens : Trie TokenConfig → Trie TokenConfig := λ tks, tks) +(firstToken : Option TokenConfig := none) -@[inline] def try {α : Type} (p : ParserM σ δ μ α) : ParserM σ δ μ α := -λ str inp, - match inp with - | ⟨Result.ok _ i _ _, _⟩ := (match p str inp with - | Result.error msg _ st x := Result.error msg i st x - | other := other) - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +@[inline] def andthenFn (p q : ParserFn) : ParserFn +| s d := + let d := p s d in + if d.hasError then d else q s d -@[inline] def lookahead (p : ParserM σ δ μ α) : ParserM σ δ μ α := -λ str inp, - match inp with - | ⟨Result.ok _ i _ bst, _⟩ := - (match p str inp with - | Result.ok a _ st _ := Result.ok a i st bst - | other := other) - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +@[noinline] def andthenInfo (p q : ParserInfo) : ParserInfo := +{ updateTokens := q.updateTokens ∘ p.updateTokens, + firstToken := p.firstToken } -@[specialize] def satisfy (p : Char → Bool) (errorMsg := "unexpected character") : ParserM σ δ μ Char := -λ str inp, - match inp with - | ⟨Result.ok _ i st bst, _⟩ := - if str.atEnd i then mkError inp "end of input" - else let c := str.get i in - if p c then Result.ok c (str.next i) st bst - else mkError inp errorMsg - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +def ParserData.mkNode (d : ParserData) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserData := +match d with +| ⟨stack, pos, cache, err⟩ := + if err != none && stack.size == iniStackSz then + -- If there is an error but there are no new nodes on the stack, we just return `d` + d + else + let newNode := Syntax.node k (stack.extract iniStackSz stack.size) [] in + let stack := stack.shrink iniStackSz in + let stack := stack.push newNode in + ⟨stack, pos, cache, err⟩ -@[inline] def ch (c : Char) : ParserM σ δ μ Char := -satisfy (== c) ("expected " ++ repr c) +@[inline] def nodeFn (k : SyntaxNodeKind) (p : ParserFn) : ParserFn +| s d := + let iniSz := d.stackSize in + let d := p s d in + d.mkNode k iniSz -@[inline] def any : ParserM σ δ μ Char := -satisfy (λ _, true) +@[noinline] def nodeInfo (p : ParserInfo) : ParserInfo := +{ updateTokens := p.updateTokens, + firstToken := p.firstToken } -@[specialize] partial def takeUntilAux (p : Char → Bool) : ParserM σ δ μ Unit -| str inp := - match inp with - | ⟨Result.ok _ i st bst, _⟩ := - if str.atEnd i then inp.val - else let c := str.get i in - if p c then inp.val - else takeUntilAux str (mkInput (str.next i) st bst) - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +@[inline] def orelseFn (p q : ParserFn) : ParserFn +| s d := + let iniSz := d.stackSize in + let iniPos := d.pos in + let d := p s d in + if d.hasError && d.pos == iniPos then q s (d.restore iniSz iniPos) else d -@[inline] def takeUntil (p : Char → Bool) : ParserM σ δ μ Unit := -λ str inp, takeUntilAux p str inp +@[noinline] def orelseInfo (p q : ParserInfo) : ParserInfo := +{ updateTokens := q.updateTokens ∘ p.updateTokens, + firstToken := + match p.firstToken, q.firstToken with + | some tk₁, some tk₂ := if tk₁ == tk₂ then some tk₁ else none + | _, _ := none } -partial def strAux (s : String) (errorMsg : String) (initPos : Pos) : Pos → ParserM σ δ μ Unit -| j str inp := - if s.atEnd j then inp.val - else match inp with - | ⟨Result.ok _ i st bst, _⟩ := - if str.atEnd i then Result.error errorMsg initPos st none - else if s.get j == str.get i then strAux (s.next j) str (mkInput (str.next i) st bst) - else Result.error errorMsg initPos st none - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +def ParserData.resetPos : ParserData → String.Pos → ParserData +| ⟨stack, _, cache, errorMsg⟩ pos := ⟨stack, pos, cache, errorMsg⟩ -@[inline] def str (s : String) : ParserM σ δ μ Unit := -λ str inp, - let initPos := currPos inp in - strAux s ("expected " ++ repr s) initPos 0 str inp +@[inline] def tryFn (p : ParserFn) : ParserFn +| s d := + let iniPos := d.pos in + let d := p s d in + if d.hasError then d.resetPos iniPos else d -@[specialize] partial def manyAux (a : α) (p : ParserM σ δ μ α) : ParserM σ δ μ α -| str inp := - match inp with - | ⟨Result.ok _ i₀ _ bst₀, _⟩ := - (match p str inp with - | Result.ok _ i st bst := manyAux str (mkInput i st bst) - | Result.error _ _ st _ := Result.ok a i₀ st bst₀) - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +@[noinline] def noFirstTokenInfo (info : ParserInfo) : ParserInfo := +{ updateTokens := info.updateTokens, + firstToken := none } -@[inline] def many (p : ParserM σ δ μ Unit) : ParserM σ δ μ Unit := -manyAux () p +@[inline] def optionalFn (p : ParserFn) : ParserFn := +λ s d, + let iniSz := d.stackSize in + let iniPos := d.pos in + let d := p s d in + let d := if d.hasError then d.restore iniSz iniPos else d in + d.mkNode nullKind iniSz -@[inline] def many1 (p : ParserM σ δ μ Unit) : ParserM σ δ μ Unit := -p *> many p +def ParserData.mkError (d : ParserData) (msg : String) : ParserData := +match d with +| ⟨stack, pos, cache, _⟩ := ⟨stack, pos, cache, some msg⟩ -def pos : ParserM σ δ μ Pos := -λ str inp, - match inp with - | ⟨Result.ok _ i st bst, _⟩ := Result.ok i i st bst - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +def ParserData.mkEOIError (d : ParserData) : ParserData := +d.mkError "end of input" -def error {α : Type} (msg : String) : ParserM σ δ μ α := -λ _ inp, mkError inp msg +@[specialize] partial def manyAux (p : ParserFn) : String → ParserData → ParserData +| s d := + let iniSz := d.stackSize in + let iniPos := d.pos in + let d := p s d in + if d.hasError then d.restore iniSz iniPos + else if iniPos == d.pos then d.mkError "invalid 'many' parser combinator application, parser did not consume anything" + else manyAux s d -def errorAt {α : Type} (pos : Pos) (msg : String) : ParserM σ δ μ α := -λ _ inp, match inp with -| ⟨Result.ok _ _ st _, _⟩ := Result.error msg pos st none -| ⟨Result.error _ _ _ _, h⟩ := unreachableError h +@[inline] def manyFn (p : ParserFn) : ParserFn +| s d := + let iniSz := d.stackSize in + let d := manyAux p s d in + d.mkNode nullKind iniSz -def hexDigit : ParserM σ δ μ Nat -| str inp := match inp with - | ⟨Result.ok _ i st bst, _⟩ := - if str.atEnd i then mkError inp "unexpected end of input" - else - let c := str.get i in - let i := str.next i in - if c.isDigit then Result.ok (c.toNat - '0'.toNat) i st bst - else if 'a' <= c && c <= 'f' then Result.ok (10 + c.toNat - 'a'.toNat) i st bst - else if 'A' <= c && c <= 'F' then Result.ok (10 + c.toNat - 'A'.toNat) i st bst - else mkError inp "invalid hexadecimal numeral, hexadecimal digit expected" - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h +@[specialize] partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : ParserFn +| s d := + let i := d.pos in + if s.atEnd i then d.mkEOIError + else + let c := s.get i in + if p c then d.next s i + else d.mkError errorMsg -def quotedChar : ParserM σ δ μ Char := -do p ← pos, - c ← any, - if c = '\\' then pure '\\' - else if c = '\"' then pure '\"' - else if c = '\'' then pure '\'' - else if c = 'n' then pure '\n' - else if c = 't' then pure '\t' - else if c = 'x' then do { - d₁ ← hexDigit, - d₂ ← hexDigit, - pure $ Char.ofNat (16*d₁ + d₂) } - else if c = 'u' then do { - d₁ ← hexDigit, - d₂ ← hexDigit, - d₃ ← hexDigit, - d₄ ← hexDigit, - pure $ Char.ofNat (16*(16*(16*d₁ + d₂) + d₃) + d₄) } - else errorAt p "invalid escape sequence" +@[specialize] partial def takeUntilFn (p : Char → Bool) : ParserFn +| s d := + let i := d.pos in + if s.atEnd i then d + else + let c := s.get i in + if p c then d + else takeUntilFn s (d.next s i) -partial def strLitAux : String → ParserM σ δ μ String -| out := do - c ← any, - if c == '\"' then pure out - else if c == '\\' then do c ← quotedChar, strLitAux (out.push c) - else strLitAux (out.push c) +@[specialize] def takeWhileFn (p : Char → Bool) : ParserFn := +takeUntilFn (λ c, !p c) -def strLit : ParserM σ δ μ String := -satisfy (== '\"') "expected string literal" *> strLitAux "" +@[inline] def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : ParserFn := +andthenFn (satisfyFn p errorMsg) (takeWhileFn p) -partial def finishCommentBlock : Nat → ParserM σ δ μ Unit -| nesting str inp := - match inp with - | ⟨Result.ok _ i st bst, _⟩ := - if str.atEnd i then mkError inp "end of input" - else - let c := str.get i in - let i := str.next i in +partial def finishCommentBlock : Nat → ParserFn +| nesting s d := + let i := d.pos in + if s.atEnd i then d.mkEOIError + else + let c := s.get i in + let i := s.next i in + if c == '-' then + if s.atEnd i then d.mkEOIError + else + let c := s.get i in + if c == '/' then -- "-/" end of comment + if nesting == 1 then d.next s i + else finishCommentBlock (nesting-1) s (d.next s i) + else + finishCommentBlock nesting s (d.next s i) + else if c == '/' then + if s.atEnd i then d.mkEOIError + else + let c := s.get i in + if c == '-' then finishCommentBlock (nesting+1) s (d.next s i) + else finishCommentBlock nesting s (d.setPos i) + else finishCommentBlock nesting s (d.setPos i) + +/- Consume whitespace and comments -/ +partial def whitespace : ParserFn +| s d := + let i := d.pos in + if s.atEnd i then d + else + let c := s.get i in + if c.isWhitespace then whitespace s (d.next s i) + else if c == '-' then + let i := s.next i in + let c := s.get i in + if c == '-' then andthenFn (takeUntilFn (= '\n')) whitespace s (d.next s i) + else d + else if c == '/' then + let i := s.next i in + let c := s.get i in if c == '-' then - if str.atEnd i then mkError inp "end of input" - else - let c := str.get i in - if c == '/' then -- "-/" end of comment - if nesting == 1 then Result.ok () (str.next i) st bst - else finishCommentBlock (nesting-1) str (mkInput (str.next i) st bst) - else - finishCommentBlock nesting str (mkInput i st bst) - else if c == '/' then - if str.atEnd i then mkError inp "end of input" - else - let c := str.get i in - if c == '-' then finishCommentBlock (nesting+1) str (mkInput (str.next i) st bst) - else finishCommentBlock nesting str (mkInput i st bst) - else finishCommentBlock nesting str (mkInput i st bst) - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h + let i := s.next i in + let c := s.get i in + if c == '-' then d -- "/--" doc comment is an actual token + else andthenFn (finishCommentBlock 1) whitespace s (d.next s i) + else d + else d -partial def leanWhitespace : ParserM σ δ μ Unit -| str inp := - match inp with - | ⟨Result.ok _ i st bst, _⟩ := - if str.atEnd i then inp.val +def mkEmptySubstringAt (s : String) (p : Nat) : Substring := +{str := s, startPos := p, stopPos := p } + +private def rawAux (startPos : Nat) (trailingWs : Bool) : ParserFn +| s d := + let stopPos := d.pos in + let leading := mkEmptySubstringAt s startPos in + let val := s.extract startPos stopPos in + if trailingWs then + let d := whitespace s d in + let stopPos' := d.pos in + let trailing : Substring := { str := s, startPos := stopPos, stopPos := stopPos' } in + let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in + d.pushSyntax atom + else + let trailing := mkEmptySubstringAt s stopPos in + let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in + d.pushSyntax atom + +/-- Match an arbitrary Parser and return the consumed String in a `Syntax.atom`. -/ +@[inline] def rawFn (p : ParserFn) (trailingWs := false) : ParserFn +| s d := + let startPos := d.pos in + let d := p s d in + if d.hasError then d else rawAux startPos trailingWs s d + +def hexDigitFn : ParserFn +| s d := + let i := d.pos in + if s.atEnd i then d.mkEOIError + else + let c := s.get i in + let i := s.next i in + if c.isDigit || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') then d.setPos i + else d.mkError "invalid hexadecimal numeral, hexadecimal digit expected" + +def quotedCharFn : ParserFn +| s d := + let i := d.pos in + if s.atEnd i then d.mkEOIError + else + let c := s.get i in + if c == '\\' || c == '\"' || c == '\'' || c == '\n' || c == '\t' then + d.next s i + else if c == 'x' then + andthenFn hexDigitFn hexDigitFn s (d.next s i) + else if c == 'u' then + andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) s (d.next s i) else - let c := str.get i in - if c.isWhitespace then leanWhitespace str (mkInput (str.next i) st bst) - else if c == '-' then - let i := str.next i in - let c := str.get i in - if c == '-' then ((takeUntil (= '\n')) *> leanWhitespace) str (mkInput i st bst) - else inp.val - else if c == '/' then - let i := str.next i in - let c := str.get i in - if c == '-' then - let i := str.next i in - let c := str.get i in - if c == '-' then inp.val -- "/--" doc comment is an actual token - else ((finishCommentBlock 1) *> leanWhitespace) str (mkInput i st bst) - else inp.val - else inp.val - | ⟨Result.error _ _ _ _, h⟩ := unreachableError h + d.mkError "invalid escape sequence" -end ParserM +def mkStrLitKind : IO SyntaxNodeKind := nextKind `strLit +@[init mkStrLitKind] constant strLitKind : SyntaxNodeKind := default _ -class monadParser (σ : outParam Type) (δ : outParam Type) (μ : outParam Type) (m : Type → Type) := -(lift {} {α : Type} : ParserM σ δ μ α → m α) -(map {} {α : Type} : (Π β, ParserM σ δ μ β → ParserM σ δ μ β) → m α → m α) +/-- Push `(Syntax.node tk )` into syntax stack -/ +def mkNodeToken (k : SyntaxNodeKind) (startPos : Nat) (s : String) (d : ParserData) : ParserData := +let stopPos := d.pos in +let leading := mkEmptySubstringAt s startPos in +let val := s.extract startPos stopPos in +let d := whitespace s d in +let wsStopPos := d.pos in +let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in +let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in +let tk := Syntax.node k (Array.singleton atom) [] in +d.pushSyntax tk -instance monadParserBase : monadParser σ δ μ (ParserM σ δ μ) := -{ lift := λ α, id, - map := λ α f x, f α x } +partial def strLitFnAux (startPos : Nat) : ParserFn +| s d := + let i := d.pos in + if s.atEnd i then d.mkEOIError + else + let c := s.get i in + let d := d.setPos (s.next i) in + if c == '\"' then + mkNodeToken strLitKind startPos s d + else if c == '\\' then andthenFn quotedCharFn strLitFnAux s d + else strLitFnAux s d -instance monadParserTrans {m n : Type → Type} [HasMonadLift m n] [MonadFunctor m m n n] [monadParser σ δ μ m] : monadParser σ δ μ n := -{ lift := λ α p, monadLift (monadParser.lift p : m α), - map := λ α f x, monadMap (λ β x, (monadParser.map @f x : m β)) x } +def mkNumberKind : IO SyntaxNodeKind := nextKind `number +@[init mkNumberKind] constant numberKind : SyntaxNodeKind := default _ -class monadParserAux (σ : outParam Type) (δ : outParam Type) (μ : outParam Type) (m : Type → Type) := -(map {} {α : Type} : (ParserM σ δ μ α → ParserM σ δ μ α) → m α → m α) +def decimalNumberFn (startPos : Nat) : ParserFn +| s d := + let d := takeWhileFn (λ c, c.isDigit) s d in + let i := d.pos in + let c := s.get i in + let d := + if c == '.' then + let i := s.next i in + let c := s.get i in + if c.isDigit then + takeWhileFn (λ c, c.isDigit) s (d.setPos i) + else + d + else + d in + mkNodeToken numberKind startPos s d -instance monadParserAuxBase : monadParserAux σ δ μ (ParserM σ δ μ) := -{ map := λ α, id } +def binNumberFn (startPos : Nat) : ParserFn +| s d := + let d := takeWhile1Fn (λ c, c == '0' || c == '1') "expected binary number" s d in + mkNodeToken numberKind startPos s d -instance monadParserAuxReader {m : Type → Type} {ρ : Type} [Monad m] [monadParserAux σ δ μ m] : monadParserAux σ δ μ (ReaderT ρ m) := -{ map := λ α f x r, (monadParserAux.map f : m α → m α) (x r) } +def octalNumberFn (startPos : Nat) : ParserFn +| s d := + let d := takeWhile1Fn (λ c, '0' ≤ c && c ≤ '7') "expected octal number" s d in + mkNodeToken numberKind startPos s d -section -variables {m : Type → Type} [monadParser σ δ μ m] +def hexNumberFn (startPos : Nat) : ParserFn +| s d := + let d := takeWhile1Fn (λ c, ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')) "expected hexadecimal number" s d in + mkNodeToken numberKind startPos s d -@[inline] def satisfy (p : Char → Bool) : m Char := monadParser.lift (ParserM.satisfy p) -def ch (c : Char) : m Char := monadParser.lift (ParserM.ch c) -def alpha : m Char := satisfy Char.isAlpha -def digit : m Char := satisfy Char.isDigit -def upper : m Char := satisfy Char.isUpper -def lower : m Char := satisfy Char.isLower -def any : m Char := satisfy (λ _, true) +def numberFnAux : ParserFn +| s d := + let startPos := d.pos in + if s.atEnd startPos then d.mkEOIError + else + let c := s.get startPos in + if c == '0' then + let i := s.next startPos in + let c := s.get i in + if c == 'b' || c == 'B' then + binNumberFn startPos s (d.next s i) + else if c == 'o' || c == 'O' then + octalNumberFn startPos s (d.next s i) + else if c == 'x' || c == 'X' then + hexNumberFn startPos s (d.next s i) + else + decimalNumberFn startPos s (d.next s i) + else if c.isDigit then + decimalNumberFn startPos s (d.next s startPos) + else + d.mkError "expected numeral" -@[inline] def takeUntil (p : Char → Bool) : m Unit := monadParser.lift (ParserM.takeUntil p) +def isIdCont : String → ParserData → Bool +| s d := + let i := d.pos in + let c := s.get i in + if c == '.' then + let i := s.next i in + if s.atEnd i then + false + else + let c := s.get i in + isIdFirst c || isIdBeginEscape c + else + false -@[inline] def str (s : String) : m Unit := monadParser.lift (ParserM.str s) +private def isToken (idStartPos idStopPos : Nat) (tk : Option TokenConfig) : Bool := +match tk with +| none := false +| some tk := + -- if a token is both a symbol and a valid identifier (i.e. a keyword), + -- we want it to be recognized as a symbol + tk.val.bsize ≥ idStopPos - idStopPos -@[inline] def try (p : m α) : m α := -monadParser.map (λ _ p, ParserM.try p) p -@[inline] def lookahead (p : m α) : m α := -monadParser.map (λ _ p, ParserM.lookahead p) p +def mkTokenAndFixPos (startPos : Nat) (tk : Option TokenConfig) (s : String) (d : ParserData) : ParserData := +match tk with +| none := + let d := d.setPos startPos in + d.mkError "token expected" +| some tk := + let leading := mkEmptySubstringAt s startPos in + let val := tk.val in + let stopPos := startPos + val.bsize in + let d := d.setPos stopPos in + let wsStopPos := d.pos in + let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in + let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in + d.pushSyntax atom -@[inline] def takeWhile (p : Char → Bool) : m Unit := takeUntil (λ c, !p c) +def mkIdResult (startPos : Nat) (tk : Option TokenConfig) (val : Name) (s : String) (d : ParserData) : ParserData := +let stopPos := d.pos in +if isToken startPos stopPos tk then + mkTokenAndFixPos startPos tk s d +else + let rawVal : Substring := { str := s, startPos := startPos, stopPos := stopPos } in + let d := whitespace s d in + let trailingStopPos := d.pos in + let leading := mkEmptySubstringAt s startPos in + let trailing : Substring := { str := s, startPos := stopPos, stopPos := trailingStopPos } in + let info : SourceInfo := {leading := leading, trailing := trailing, pos := startPos} in + let atom := Syntax.ident (some info) rawVal val [] [] in + d.pushSyntax atom -@[inline] def whitespace : m Unit := takeWhile Char.isWhitespace +partial def identFnAux (startPos : Nat) (tk : Option TokenConfig) : Name → ParserFn +| r s d := + let i := d.pos in + if s.atEnd i then d.mkEOIError + else + let c := s.get i in + if isIdBeginEscape c then + let startPart := s.next i in + let d := takeUntilFn isIdEndEscape s (d.setPos startPart) in + let stopPart := d.pos in + let d := satisfyFn isIdEndEscape "end of escaped identifier expected" s d in + if d.hasError then d + else + let r := Name.mkString r (s.extract startPart stopPart) in + if isIdCont s d then + identFnAux r s d + else + mkIdResult startPos tk r s d + else if isIdFirst c then + let startPart := i in + let d := takeWhileFn isIdRest s (d.next s i) in + let stopPart := d.pos in + let r := Name.mkString r (s.extract startPart stopPart) in + if isIdCont s d then + identFnAux r s d + else + mkIdResult startPart tk r s d + else + mkTokenAndFixPos startPos tk s d -@[inline] def strLit : m String := monadParser.lift (ParserM.strLit) +structure AbsParser (ρ : Type) := +(info : Thunk ParserInfo := Thunk.mk (λ _, {})) +(fn : ρ) -@[inline] def leanWhitespace : m Unit := monadParser.lift ParserM.leanWhitespace -end +abbrev Parser := AbsParser ParserFn -section -variables {m : Type → Type} [monadParserAux σ δ μ m] +class ParserFnLift (ρ : Type) := +(lift {} : ParserFn → ρ) +(map : (ParserFn → ParserFn) → ρ → ρ) +(map₂ : (ParserFn → ParserFn → ParserFn) → ρ → ρ → ρ) -@[inline] def many (p : m Unit) : m Unit := monadParserAux.map ParserM.many p -@[inline] def many1 (p : m Unit) : m Unit := monadParserAux.map ParserM.many1 p +instance parserLiftInhabited {ρ : Type} [ParserFnLift ρ] : Inhabited ρ := +⟨ParserFnLift.lift (default _)⟩ -end +instance idParserLift : ParserFnLift ParserFn := +{ lift := λ p, p, + map := λ m p, m p, + map₂ := λ m p1 p2, m p1 p2 } + +@[inline] +def liftParser {ρ : Type} [ParserFnLift ρ] (info : Thunk ParserInfo) (fn : ParserFn) : AbsParser ρ := +{ info := info, fn := ParserFnLift.lift fn } + +@[inline] +def mapParser {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn) : AbsParser ρ → AbsParser ρ := +λ p, { info := Thunk.mk (λ _, infoFn p.info.get), fn := ParserFnLift.map pFn p.fn } + +@[inline] +def mapParser₂ {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn → ParserFn) + : AbsParser ρ → AbsParser ρ → AbsParser ρ := +λ p q, { info := Thunk.mk (λ _, infoFn p.info.get q.info.get), fn := ParserFnLift.map₂ pFn p.fn q.fn } + +def EnvParserFn (α : Type) (ρ : Type) := +α → ρ + +def RecParserFn (α ρ : Type) := +EnvParserFn (α → ρ) ρ + +instance envParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (EnvParserFn α ρ) := +{ lift := λ p a, ParserFnLift.lift p, + map := λ m p a, ParserFnLift.map m (p a), + map₂ := λ m p1 p2 a, ParserFnLift.map₂ m (p1 a) (p2 a) } + +instance recParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (RecParserFn α ρ) := +inferInstanceAs (ParserFnLift (EnvParserFn (α → ρ) ρ)) + +namespace RecParserFn +variables {α ρ : Type} + +@[inline] def recurse (a : α) : RecParserFn α ρ := +λ p, p a + +@[inline] def run [ParserFnLift ρ] (x : RecParserFn α ρ) (rec : α → RecParserFn α ρ) : ρ := +x (fix (λ f a, rec a f)) + +end RecParserFn + +@[inline] def andthen {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ := +mapParser₂ andthenInfo andthenFn + +@[inline] def node {ρ : Type} [ParserFnLift ρ] (k : SyntaxNodeKind) : AbsParser ρ → AbsParser ρ := +mapParser nodeInfo (nodeFn k) + +@[inline] def orelse {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ := +mapParser₂ orelseInfo orelseFn + +@[inline] def try {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ := +mapParser noFirstTokenInfo tryFn + +@[inline] def many {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ := +mapParser noFirstTokenInfo manyFn + +@[inline] def optional {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ := +mapParser noFirstTokenInfo optionalFn + +@[inline] def many1 {ρ : Type} [ParserFnLift ρ] (p : AbsParser ρ) : AbsParser ρ := +andthen p (many p) + +abbrev BasicParserFn : Type := EnvParserFn ParserConfig ParserFn +abbrev BasicParser : Type := AbsParser BasicParserFn +abbrev CmdParserFn (ρ : Type) : Type := EnvParserFn ρ (RecParserFn Unit ParserFn) +abbrev TermParserFn : Type := RecParserFn Nat (CmdParserFn ParserConfig) +abbrev TermParser : Type := AbsParser TermParserFn +abbrev TrailingTermParser : Type := AbsParser (EnvParserFn Syntax TermParserFn) + +def TokenMap (α : Type) := RBMap Name (List α) Name.quickLt + +structure CommandParserConfig extends ParserConfig := +(leadingTermParsers : TokenMap TermParser) +(trailingTermParsers : TokenMap TrailingTermParser) +-- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them +(localLeadingTermParsers : TokenMap TermParser := mkRBMap _ _ _) +(localTrailingTermParsers : TokenMap TrailingTermParser := mkRBMap _ _ _) + +abbrev CommandParser : Type := AbsParser (CmdParserFn CommandParserConfig) + +@[inline] def Term.parser (rbp : Nat := 0) : TermParser := +{ fn := RecParserFn.recurse rbp } + +@[inline] def Command.parser : CommandParser := +{ fn := λ _, RecParserFn.recurse () } + +@[inline] def basicParser2TermParser (p : BasicParser) : TermParser := +{ info := Thunk.mk (λ _, p.info.get), fn := λ _ cfg _, p.fn cfg } + +instance basic2term : HasCoe BasicParser TermParser := +⟨basicParser2TermParser⟩ + +@[inline] def basicParser2CmdParser (p : BasicParser) : CommandParser := +{ info := Thunk.mk (λ _, p.info.get), fn := λ cfg _, p.fn cfg.toParserConfig } + +instance basicmd : HasCoe BasicParser CommandParser := +⟨basicParser2CmdParser⟩ + +private def tokenFnAux : BasicParserFn +| cfg s d := + let i := d.pos in + let c := s.get i in + if c == '\"' then + strLitFnAux i s (d.next s i) + else if c.isDigit then + numberFnAux s d + else + let (_, tk) := cfg.tokens.matchPrefix s i in + identFnAux i tk Name.anonymous s d + +private def updateCache (startPos : Nat) (d : ParserData) : ParserData := +match d with +| ⟨stack, pos, cache, none⟩ := + if stack.size == 0 then d + else + let tk := stack.back in + ⟨stack, pos, { tokenCache := some { startPos := startPos, stopPos := pos, token := tk } }, none⟩ +| other := other + +def tokenFn : BasicParserFn +| cfg s d := + let i := d.pos in + if s.atEnd i then d.mkEOIError + else + match d.cache with + | { tokenCache := some tkc } := + if tkc.startPos == i then + let d := d.pushSyntax tkc.token in + d.setPos tkc.stopPos + else + let d := tokenFnAux cfg s d in + updateCache i d + | _ := + let d := tokenFnAux cfg s d in + updateCache i d + +def symbolFnAux (sym : String) (errorMsg : String) : BasicParserFn +| cfg s d := + let d := tokenFn cfg s d in + if d.hasError then + d.mkError errorMsg + else + match d.stxStack.back with + | Syntax.atom _ sym' := if sym == sym' then d else d.mkError errorMsg + | _ := d.mkError errorMsg + +@[inline] def symbolFn (sym : String) : BasicParserFn := +symbolFnAux sym ("expected '" ++ sym ++ "'") + +def symbolInfo (sym : String) (lbp : Nat) : ParserInfo := +{ updateTokens := λ trie, trie.insert sym { val := sym, lbp := lbp }, + firstToken := some { val := sym, lbp := lbp } } + +@[inline] def symbol (sym : String) (lbp : Nat := 0) : BasicParser := +{ info := Thunk.mk (λ _, symbolInfo sym lbp), + fn := symbolFn sym } + +def numberFn : BasicParserFn +| cfg s d := + let d := tokenFn cfg s d in + if d.hasError || !(d.stxStack.back.isOfKind numberKind) then + d.mkError "expected numeral" + else + d + +@[inline] def number : BasicParser := +{ fn := numberFn } + +def strLitFn : BasicParserFn +| cfg s d := + let d := tokenFn cfg s d in + if d.hasError || !(d.stxStack.back.isOfKind strLitKind) then + d.mkError "expected string literal" + else + d + +@[inline] def strLit : BasicParser := +{ fn := numberFn } + +def mkFrontendConfig (filename input : String) : FrontendConfig := +{ filename := filename, + input := input, + fileMap := input.toFileMap } + +def BasicParser.run (p : BasicParser) (input : String) (filename : String := "") : Except String Syntax := +let frontendCfg := mkFrontendConfig filename input in +let tokens := p.info.get.updateTokens {} in +let cfg : ParserConfig := { tokens := tokens, .. frontendCfg } in +let d : ParserData := { stxStack := Array.empty, pos := 0, cache := {}, errorMsg := none } in +let d := p.fn cfg input d in +if d.hasError then + Except.error (d.toErrorMsg cfg) +else + Except.ok d.stxStack.back end Parser -end Lean +-- end Lean diff --git a/tests/playground/parser/parser2.lean b/tests/playground/parser/parser2.lean deleted file mode 100644 index 4731fd4be8..0000000000 --- a/tests/playground/parser/parser2.lean +++ /dev/null @@ -1,716 +0,0 @@ -import init.lean.name init.lean.parser.trie init.lean.parser.identifier -import syntax filemap - -open Lean -export Lean.Parser (Trie) --- namespace Lean -namespace Parser - -structure FrontendConfig := -(filename : String) -(input : String) -(fileMap : FileMap) - -structure TokenConfig := -(val : String) -(lbp : Nat := 0) - -namespace TokenConfig - -def beq : TokenConfig → TokenConfig → Bool -| ⟨val₁, lbp₁⟩ ⟨val₂, lbp₂⟩ := val₁ == val₂ && lbp₁ == lbp₂ - -instance : HasBeq TokenConfig := -⟨beq⟩ - -end TokenConfig - -structure TokenCacheEntry := -(startPos stopPos : String.Pos) -(token : Syntax) - -structure ParserCache := -(tokenCache : Option TokenCacheEntry := none) - -structure ParserConfig extends FrontendConfig := -(tokens : Trie TokenConfig) - -structure ParserData := -(stxStack : Array Syntax) (pos : String.Pos) (cache : ParserCache) (errorMsg : Option String) - -@[inline] def ParserData.hasError (d : ParserData) : Bool := -d.errorMsg != none - -@[inline] def ParserData.stackSize (d : ParserData) : Nat := -d.stxStack.size - -def ParserData.restore (d : ParserData) (iniStackSz : Nat) (iniPos : Nat) : ParserData := -match d with -| ⟨stack, _, cache, _⟩ := ⟨stack.shrink iniStackSz, iniPos, cache, none⟩ - -def ParserData.setPos (d : ParserData) (pos : Nat) : ParserData := -match d with -| ⟨stack, _, cache, msg⟩ := ⟨stack, pos, cache, msg⟩ - -def ParserData.setCache (d : ParserData) (cache : ParserCache) : ParserData := -match d with -| ⟨stack, pos, _, msg⟩ := ⟨stack, pos, cache, msg⟩ - -def ParserData.pushSyntax (d : ParserData) (n : Syntax) : ParserData := -match d with -| ⟨stack, pos, cache, msg⟩ := ⟨stack.push n, pos, cache, msg⟩ - -def ParserData.next (d : ParserData) (s : String) (pos : Nat) : ParserData := -d.setPos (s.next pos) - -def ParserData.toErrorMsg (d : ParserData) (cfg : ParserConfig) : String := -match d.errorMsg with -| none := "" -| some msg := - let pos := cfg.fileMap.toPosition d.pos in - cfg.filename ++ ":" ++ toString pos.line ++ ":" ++ toString pos.column ++ " " ++ msg - -def ParserFn := String → ParserData → ParserData - -instance : Inhabited ParserFn := -⟨λ s, id⟩ - -structure ParserInfo := -(updateTokens : Trie TokenConfig → Trie TokenConfig := λ tks, tks) -(firstToken : Option TokenConfig := none) - -@[inline] def andthenFn (p q : ParserFn) : ParserFn -| s d := - let d := p s d in - if d.hasError then d else q s d - -@[noinline] def andthenInfo (p q : ParserInfo) : ParserInfo := -{ updateTokens := q.updateTokens ∘ p.updateTokens, - firstToken := p.firstToken } - -def ParserData.mkNode (d : ParserData) (k : SyntaxNodeKind) (iniStackSz : Nat) : ParserData := -match d with -| ⟨stack, pos, cache, err⟩ := - if err != none && stack.size == iniStackSz then - -- If there is an error but there are no new nodes on the stack, we just return `d` - d - else - let newNode := Syntax.node k (stack.extract iniStackSz stack.size) [] in - let stack := stack.shrink iniStackSz in - let stack := stack.push newNode in - ⟨stack, pos, cache, err⟩ - -@[inline] def nodeFn (k : SyntaxNodeKind) (p : ParserFn) : ParserFn -| s d := - let iniSz := d.stackSize in - let d := p s d in - d.mkNode k iniSz - -@[noinline] def nodeInfo (p : ParserInfo) : ParserInfo := -{ updateTokens := p.updateTokens, - firstToken := p.firstToken } - -@[inline] def orelseFn (p q : ParserFn) : ParserFn -| s d := - let iniSz := d.stackSize in - let iniPos := d.pos in - let d := p s d in - if d.hasError && d.pos == iniPos then q s (d.restore iniSz iniPos) else d - -@[noinline] def orelseInfo (p q : ParserInfo) : ParserInfo := -{ updateTokens := q.updateTokens ∘ p.updateTokens, - firstToken := - match p.firstToken, q.firstToken with - | some tk₁, some tk₂ := if tk₁ == tk₂ then some tk₁ else none - | _, _ := none } - -def ParserData.resetPos : ParserData → String.Pos → ParserData -| ⟨stack, _, cache, errorMsg⟩ pos := ⟨stack, pos, cache, errorMsg⟩ - -@[inline] def tryFn (p : ParserFn) : ParserFn -| s d := - let iniPos := d.pos in - let d := p s d in - if d.hasError then d.resetPos iniPos else d - -@[noinline] def noFirstTokenInfo (info : ParserInfo) : ParserInfo := -{ updateTokens := info.updateTokens, - firstToken := none } - -@[inline] def optionalFn (p : ParserFn) : ParserFn := -λ s d, - let iniSz := d.stackSize in - let iniPos := d.pos in - let d := p s d in - let d := if d.hasError then d.restore iniSz iniPos else d in - d.mkNode nullKind iniSz - -def ParserData.mkError (d : ParserData) (msg : String) : ParserData := -match d with -| ⟨stack, pos, cache, _⟩ := ⟨stack, pos, cache, some msg⟩ - -def ParserData.mkEOIError (d : ParserData) : ParserData := -d.mkError "end of input" - -@[specialize] partial def manyAux (p : ParserFn) : String → ParserData → ParserData -| s d := - let iniSz := d.stackSize in - let iniPos := d.pos in - let d := p s d in - if d.hasError then d.restore iniSz iniPos - else if iniPos == d.pos then d.mkError "invalid 'many' parser combinator application, parser did not consume anything" - else manyAux s d - -@[inline] def manyFn (p : ParserFn) : ParserFn -| s d := - let iniSz := d.stackSize in - let d := manyAux p s d in - d.mkNode nullKind iniSz - -@[specialize] partial def satisfyFn (p : Char → Bool) (errorMsg : String := "unexpected character") : ParserFn -| s d := - let i := d.pos in - if s.atEnd i then d.mkEOIError - else - let c := s.get i in - if p c then d.next s i - else d.mkError errorMsg - -@[specialize] partial def takeUntilFn (p : Char → Bool) : ParserFn -| s d := - let i := d.pos in - if s.atEnd i then d - else - let c := s.get i in - if p c then d - else takeUntilFn s (d.next s i) - -@[specialize] def takeWhileFn (p : Char → Bool) : ParserFn := -takeUntilFn (λ c, !p c) - -@[inline] def takeWhile1Fn (p : Char → Bool) (errorMsg : String) : ParserFn := -andthenFn (satisfyFn p errorMsg) (takeWhileFn p) - -partial def finishCommentBlock : Nat → ParserFn -| nesting s d := - let i := d.pos in - if s.atEnd i then d.mkEOIError - else - let c := s.get i in - let i := s.next i in - if c == '-' then - if s.atEnd i then d.mkEOIError - else - let c := s.get i in - if c == '/' then -- "-/" end of comment - if nesting == 1 then d.next s i - else finishCommentBlock (nesting-1) s (d.next s i) - else - finishCommentBlock nesting s (d.next s i) - else if c == '/' then - if s.atEnd i then d.mkEOIError - else - let c := s.get i in - if c == '-' then finishCommentBlock (nesting+1) s (d.next s i) - else finishCommentBlock nesting s (d.setPos i) - else finishCommentBlock nesting s (d.setPos i) - -/- Consume whitespace and comments -/ -partial def whitespace : ParserFn -| s d := - let i := d.pos in - if s.atEnd i then d - else - let c := s.get i in - if c.isWhitespace then whitespace s (d.next s i) - else if c == '-' then - let i := s.next i in - let c := s.get i in - if c == '-' then andthenFn (takeUntilFn (= '\n')) whitespace s (d.next s i) - else d - else if c == '/' then - let i := s.next i in - let c := s.get i in - if c == '-' then - let i := s.next i in - let c := s.get i in - if c == '-' then d -- "/--" doc comment is an actual token - else andthenFn (finishCommentBlock 1) whitespace s (d.next s i) - else d - else d - -def mkEmptySubstringAt (s : String) (p : Nat) : Substring := -{str := s, startPos := p, stopPos := p } - -private def rawAux (startPos : Nat) (trailingWs : Bool) : ParserFn -| s d := - let stopPos := d.pos in - let leading := mkEmptySubstringAt s startPos in - let val := s.extract startPos stopPos in - if trailingWs then - let d := whitespace s d in - let stopPos' := d.pos in - let trailing : Substring := { str := s, startPos := stopPos, stopPos := stopPos' } in - let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in - d.pushSyntax atom - else - let trailing := mkEmptySubstringAt s stopPos in - let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in - d.pushSyntax atom - -/-- Match an arbitrary Parser and return the consumed String in a `Syntax.atom`. -/ -@[inline] def rawFn (p : ParserFn) (trailingWs := false) : ParserFn -| s d := - let startPos := d.pos in - let d := p s d in - if d.hasError then d else rawAux startPos trailingWs s d - -def hexDigitFn : ParserFn -| s d := - let i := d.pos in - if s.atEnd i then d.mkEOIError - else - let c := s.get i in - let i := s.next i in - if c.isDigit || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') then d.setPos i - else d.mkError "invalid hexadecimal numeral, hexadecimal digit expected" - -def quotedCharFn : ParserFn -| s d := - let i := d.pos in - if s.atEnd i then d.mkEOIError - else - let c := s.get i in - if c == '\\' || c == '\"' || c == '\'' || c == '\n' || c == '\t' then - d.next s i - else if c == 'x' then - andthenFn hexDigitFn hexDigitFn s (d.next s i) - else if c == 'u' then - andthenFn hexDigitFn (andthenFn hexDigitFn (andthenFn hexDigitFn hexDigitFn)) s (d.next s i) - else - d.mkError "invalid escape sequence" - -def mkStrLitKind : IO SyntaxNodeKind := nextKind `strLit -@[init mkStrLitKind] constant strLitKind : SyntaxNodeKind := default _ - -/-- Push `(Syntax.node tk )` into syntax stack -/ -def mkNodeToken (k : SyntaxNodeKind) (startPos : Nat) (s : String) (d : ParserData) : ParserData := -let stopPos := d.pos in -let leading := mkEmptySubstringAt s startPos in -let val := s.extract startPos stopPos in -let d := whitespace s d in -let wsStopPos := d.pos in -let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in -let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in -let tk := Syntax.node k (Array.singleton atom) [] in -d.pushSyntax tk - -partial def strLitFnAux (startPos : Nat) : ParserFn -| s d := - let i := d.pos in - if s.atEnd i then d.mkEOIError - else - let c := s.get i in - let d := d.setPos (s.next i) in - if c == '\"' then - mkNodeToken strLitKind startPos s d - else if c == '\\' then andthenFn quotedCharFn strLitFnAux s d - else strLitFnAux s d - -def mkNumberKind : IO SyntaxNodeKind := nextKind `number -@[init mkNumberKind] constant numberKind : SyntaxNodeKind := default _ - -def decimalNumberFn (startPos : Nat) : ParserFn -| s d := - let d := takeWhileFn (λ c, c.isDigit) s d in - let i := d.pos in - let c := s.get i in - let d := - if c == '.' then - let i := s.next i in - let c := s.get i in - if c.isDigit then - takeWhileFn (λ c, c.isDigit) s (d.setPos i) - else - d - else - d in - mkNodeToken numberKind startPos s d - -def binNumberFn (startPos : Nat) : ParserFn -| s d := - let d := takeWhile1Fn (λ c, c == '0' || c == '1') "expected binary number" s d in - mkNodeToken numberKind startPos s d - -def octalNumberFn (startPos : Nat) : ParserFn -| s d := - let d := takeWhile1Fn (λ c, '0' ≤ c && c ≤ '7') "expected octal number" s d in - mkNodeToken numberKind startPos s d - -def hexNumberFn (startPos : Nat) : ParserFn -| s d := - let d := takeWhile1Fn (λ c, ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')) "expected hexadecimal number" s d in - mkNodeToken numberKind startPos s d - -def numberFnAux : ParserFn -| s d := - let startPos := d.pos in - if s.atEnd startPos then d.mkEOIError - else - let c := s.get startPos in - if c == '0' then - let i := s.next startPos in - let c := s.get i in - if c == 'b' || c == 'B' then - binNumberFn startPos s (d.next s i) - else if c == 'o' || c == 'O' then - octalNumberFn startPos s (d.next s i) - else if c == 'x' || c == 'X' then - hexNumberFn startPos s (d.next s i) - else - decimalNumberFn startPos s (d.next s i) - else if c.isDigit then - decimalNumberFn startPos s (d.next s startPos) - else - d.mkError "expected numeral" - -def isIdCont : String → ParserData → Bool -| s d := - let i := d.pos in - let c := s.get i in - if c == '.' then - let i := s.next i in - if s.atEnd i then - false - else - let c := s.get i in - isIdFirst c || isIdBeginEscape c - else - false - -private def isToken (idStartPos idStopPos : Nat) (tk : Option TokenConfig) : Bool := -match tk with -| none := false -| some tk := - -- if a token is both a symbol and a valid identifier (i.e. a keyword), - -- we want it to be recognized as a symbol - tk.val.bsize ≥ idStopPos - idStopPos - - -def mkTokenAndFixPos (startPos : Nat) (tk : Option TokenConfig) (s : String) (d : ParserData) : ParserData := -match tk with -| none := - let d := d.setPos startPos in - d.mkError "token expected" -| some tk := - let leading := mkEmptySubstringAt s startPos in - let val := tk.val in - let stopPos := startPos + val.bsize in - let d := d.setPos stopPos in - let wsStopPos := d.pos in - let trailing := { Substring . str := s, startPos := stopPos, stopPos := wsStopPos } in - let atom := Syntax.atom (some { leading := leading, pos := startPos, trailing := trailing }) val in - d.pushSyntax atom - -def mkIdResult (startPos : Nat) (tk : Option TokenConfig) (val : Name) (s : String) (d : ParserData) : ParserData := -let stopPos := d.pos in -if isToken startPos stopPos tk then - mkTokenAndFixPos startPos tk s d -else - let rawVal : Substring := { str := s, startPos := startPos, stopPos := stopPos } in - let d := whitespace s d in - let trailingStopPos := d.pos in - let leading := mkEmptySubstringAt s startPos in - let trailing : Substring := { str := s, startPos := stopPos, stopPos := trailingStopPos } in - let info : SourceInfo := {leading := leading, trailing := trailing, pos := startPos} in - let atom := Syntax.ident (some info) rawVal val [] [] in - d.pushSyntax atom - -partial def identFnAux (startPos : Nat) (tk : Option TokenConfig) : Name → ParserFn -| r s d := - let i := d.pos in - if s.atEnd i then d.mkEOIError - else - let c := s.get i in - if isIdBeginEscape c then - let startPart := s.next i in - let d := takeUntilFn isIdEndEscape s (d.setPos startPart) in - let stopPart := d.pos in - let d := satisfyFn isIdEndEscape "end of escaped identifier expected" s d in - if d.hasError then d - else - let r := Name.mkString r (s.extract startPart stopPart) in - if isIdCont s d then - identFnAux r s d - else - mkIdResult startPos tk r s d - else if isIdFirst c then - let startPart := i in - let d := takeWhileFn isIdRest s (d.next s i) in - let stopPart := d.pos in - let r := Name.mkString r (s.extract startPart stopPart) in - if isIdCont s d then - identFnAux r s d - else - mkIdResult startPart tk r s d - else - mkTokenAndFixPos startPos tk s d - -structure AbsParser (ρ : Type) := -(info : Thunk ParserInfo := Thunk.mk (λ _, {})) -(fn : ρ) - -abbrev Parser := AbsParser ParserFn - -class ParserFnLift (ρ : Type) := -(lift {} : ParserFn → ρ) -(map : (ParserFn → ParserFn) → ρ → ρ) -(map₂ : (ParserFn → ParserFn → ParserFn) → ρ → ρ → ρ) - -instance parserLiftInhabited {ρ : Type} [ParserFnLift ρ] : Inhabited ρ := -⟨ParserFnLift.lift (default _)⟩ - -instance idParserLift : ParserFnLift ParserFn := -{ lift := λ p, p, - map := λ m p, m p, - map₂ := λ m p1 p2, m p1 p2 } - -@[inline] -def liftParser {ρ : Type} [ParserFnLift ρ] (info : Thunk ParserInfo) (fn : ParserFn) : AbsParser ρ := -{ info := info, fn := ParserFnLift.lift fn } - -@[inline] -def mapParser {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn) : AbsParser ρ → AbsParser ρ := -λ p, { info := Thunk.mk (λ _, infoFn p.info.get), fn := ParserFnLift.map pFn p.fn } - -@[inline] -def mapParser₂ {ρ : Type} [ParserFnLift ρ] (infoFn : ParserInfo → ParserInfo → ParserInfo) (pFn : ParserFn → ParserFn → ParserFn) - : AbsParser ρ → AbsParser ρ → AbsParser ρ := -λ p q, { info := Thunk.mk (λ _, infoFn p.info.get q.info.get), fn := ParserFnLift.map₂ pFn p.fn q.fn } - -def EnvParserFn (α : Type) (ρ : Type) := -α → ρ - -def RecParserFn (α ρ : Type) := -EnvParserFn (α → ρ) ρ - -instance envParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (EnvParserFn α ρ) := -{ lift := λ p a, ParserFnLift.lift p, - map := λ m p a, ParserFnLift.map m (p a), - map₂ := λ m p1 p2 a, ParserFnLift.map₂ m (p1 a) (p2 a) } - -instance recParserLift (α ρ : Type) [ParserFnLift ρ] : ParserFnLift (RecParserFn α ρ) := -inferInstanceAs (ParserFnLift (EnvParserFn (α → ρ) ρ)) - -namespace RecParserFn -variables {α ρ : Type} - -@[inline] def recurse (a : α) : RecParserFn α ρ := -λ p, p a - -@[inline] def run [ParserFnLift ρ] (x : RecParserFn α ρ) (rec : α → RecParserFn α ρ) : ρ := -x (fix (λ f a, rec a f)) - -end RecParserFn - -@[inline] def andthen {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ := -mapParser₂ andthenInfo andthenFn - -@[inline] def node {ρ : Type} [ParserFnLift ρ] (k : SyntaxNodeKind) : AbsParser ρ → AbsParser ρ := -mapParser nodeInfo (nodeFn k) - -@[inline] def orelse {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ → AbsParser ρ := -mapParser₂ orelseInfo orelseFn - -@[inline] def try {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ := -mapParser noFirstTokenInfo tryFn - -@[inline] def many {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ := -mapParser noFirstTokenInfo manyFn - -@[inline] def optional {ρ : Type} [ParserFnLift ρ] : AbsParser ρ → AbsParser ρ := -mapParser noFirstTokenInfo optionalFn - -@[inline] def many1 {ρ : Type} [ParserFnLift ρ] (p : AbsParser ρ) : AbsParser ρ := -andthen p (many p) - -abbrev BasicParserFn : Type := EnvParserFn ParserConfig ParserFn -abbrev BasicParser : Type := AbsParser BasicParserFn -abbrev CmdParserFn (ρ : Type) : Type := EnvParserFn ρ (RecParserFn Unit ParserFn) -abbrev TermParserFn : Type := RecParserFn Nat (CmdParserFn ParserConfig) -abbrev TermParser : Type := AbsParser TermParserFn -abbrev TrailingTermParser : Type := AbsParser (EnvParserFn Syntax TermParserFn) - -def TokenMap (α : Type) := RBMap Name (List α) Name.quickLt - -structure CommandParserConfig extends ParserConfig := -(leadingTermParsers : TokenMap TermParser) -(trailingTermParsers : TokenMap TrailingTermParser) --- local Term parsers (such as from `local notation`) hide previous parsers instead of overloading them -(localLeadingTermParsers : TokenMap TermParser := mkRBMap _ _ _) -(localTrailingTermParsers : TokenMap TrailingTermParser := mkRBMap _ _ _) - -abbrev CommandParser : Type := AbsParser (CmdParserFn CommandParserConfig) - -@[inline] def Term.parser (rbp : Nat := 0) : TermParser := -{ fn := RecParserFn.recurse rbp } - -@[inline] def Command.parser : CommandParser := -{ fn := λ _, RecParserFn.recurse () } - -@[inline] def basicParser2TermParser (p : BasicParser) : TermParser := -{ info := Thunk.mk (λ _, p.info.get), fn := λ _ cfg _, p.fn cfg } - -instance basic2term : HasCoe BasicParser TermParser := -⟨basicParser2TermParser⟩ - -@[inline] def basicParser2CmdParser (p : BasicParser) : CommandParser := -{ info := Thunk.mk (λ _, p.info.get), fn := λ cfg _, p.fn cfg.toParserConfig } - -instance basicmd : HasCoe BasicParser CommandParser := -⟨basicParser2CmdParser⟩ - -private def tokenFnAux : BasicParserFn -| cfg s d := - let i := d.pos in - let c := s.get i in - if c == '\"' then - strLitFnAux i s (d.next s i) - else if c.isDigit then - numberFnAux s d - else - let (_, tk) := cfg.tokens.matchPrefix s i in - identFnAux i tk Name.anonymous s d - -private def updateCache (startPos : Nat) (d : ParserData) : ParserData := -match d with -| ⟨stack, pos, cache, none⟩ := - if stack.size == 0 then d - else - let tk := stack.back in - ⟨stack, pos, { tokenCache := some { startPos := startPos, stopPos := pos, token := tk } }, none⟩ -| other := other - -def tokenFn : BasicParserFn -| cfg s d := - let i := d.pos in - if s.atEnd i then d.mkEOIError - else - match d.cache with - | { tokenCache := some tkc } := - if tkc.startPos == i then - let d := d.pushSyntax tkc.token in - d.setPos tkc.stopPos - else - let d := tokenFnAux cfg s d in - updateCache i d - | _ := - let d := tokenFnAux cfg s d in - updateCache i d - -def symbolFnAux (sym : String) (errorMsg : String) : BasicParserFn -| cfg s d := - let d := tokenFn cfg s d in - if d.hasError then - d.mkError errorMsg - else - match d.stxStack.back with - | Syntax.atom _ sym' := if sym == sym' then d else d.mkError errorMsg - | _ := d.mkError errorMsg - -@[inline] def symbolFn (sym : String) : BasicParserFn := -symbolFnAux sym ("expected '" ++ sym ++ "'") - -def symbolInfo (sym : String) (lbp : Nat) : ParserInfo := -{ updateTokens := λ trie, trie.insert sym { val := sym, lbp := lbp }, - firstToken := some { val := sym, lbp := lbp } } - -@[inline] def symbol (sym : String) (lbp : Nat := 0) : BasicParser := -{ info := Thunk.mk (λ _, symbolInfo sym lbp), - fn := symbolFn sym } - -def numberFn : BasicParserFn -| cfg s d := - let d := tokenFn cfg s d in - if d.hasError || !(d.stxStack.back.isOfKind numberKind) then - d.mkError "expected numeral" - else - d - -@[inline] def number : BasicParser := -{ fn := numberFn } - -def strLitFn : BasicParserFn -| cfg s d := - let d := tokenFn cfg s d in - if d.hasError || !(d.stxStack.back.isOfKind strLitKind) then - d.mkError "expected string literal" - else - d - -@[inline] def strLit : BasicParser := -{ fn := numberFn } - -def mkFrontendConfig (filename input : String) : FrontendConfig := -{ filename := filename, - input := input, - fileMap := input.toFileMap } - -def BasicParser.run (p : BasicParser) (input : String) (filename : String := "") : Except String Syntax := -let frontendCfg := mkFrontendConfig filename input in -let tokens := p.info.get.updateTokens {} in -let cfg : ParserConfig := { tokens := tokens, .. frontendCfg } in -let d : ParserData := { stxStack := Array.empty, pos := 0, cache := {}, errorMsg := none } in -let d := p.fn cfg input d in -if d.hasError then - Except.error (d.toErrorMsg cfg) -else - Except.ok d.stxStack.back - - -local infix ` ; `:10 := Parser.andthen -local infix ` || `:5 := Parser.orelse - -def mkTestKind : IO SyntaxNodeKind := nextKind `test -@[init mkTestKind] constant testKind : SyntaxNodeKind := default _ - -set_option pp.implicit true -set_option pp.binder_types false -set_option trace.compiler.stage2 true -set_option trace.compiler.boxed true --- set_option trace.compiler.stage1 true --- set_option trace.compiler.lcnf true --- set_option trace.compiler.lcnf true --- set_option trace.compiler.simp true - -@[inline2] -def p0 : BasicParser := -node testKind (symbol "foo"; many (symbol "boo")) - -@[inline2] -def p1 (s : String) : TermParser := -symbol "hello"; symbol "world"; symbol "boo" -|| -symbol s -|| -symbol "opt3"; symbol "boo" - -@[inline2] -def p2 (s : String) : TermParser := --- symbol "boo"; p1; p1; p0 -p1 "hello"; p0; p1 s - -@[inline2] -def p3 (s : String) : TermParser := -p1 s -|| -p2 s -|| -symbol "boo"; p2 s - -@[inline2] -def p4 (s : String) : CommandParser := -symbol s; symbol "boo" - -end Parser --- end Lean diff --git a/tests/playground/parser/syntax.lean b/tests/playground/parser/syntax.lean index 3df99ae3b1..58796799ac 100644 --- a/tests/playground/parser/syntax.lean +++ b/tests/playground/parser/syntax.lean @@ -236,6 +236,25 @@ partial def reprint : Syntax → Option String else args.mfoldl "" (λ stx r, do s ← reprint stx, pure $ r ++ s) | missing := "" +open Lean.Format + +protected partial def toFormat : Syntax → Format +| (atom info val) := toFmt $ repr val +| (ident _ _ val pre scopes) := + let scopes := pre.map toFmt ++ scopes.reverse.map toFmt in + let scopes := match scopes with [] := toFmt "" | _ := bracket "{" (joinSep scopes ", ") "}" in + toFmt "`" ++ toFmt val ++ scopes +| (node kind args scopes) := + let scopes := match scopes with [] := toFmt "" | _ := bracket "{" (joinSep scopes.reverse ", ") "}" in + if kind.name = `Lean.Parser.noKind then + sbracket $ scopes ++ joinSep (args.toList.map toFormat) line + else + let shorterName := kind.name.replacePrefix `Lean.Parser Name.anonymous in + paren $ joinSep ((toFmt shorterName ++ scopes) :: args.toList.map toFormat) line +| missing := "" + +instance : HasToFormat Syntax := ⟨Syntax.toFormat⟩ +instance : HasToString Syntax := ⟨toString ∘ toFmt⟩ end Syntax end Parser diff --git a/tests/playground/parser/test1.lean b/tests/playground/parser/test1.lean index e2a866a44f..b746cfdf34 100644 --- a/tests/playground/parser/test1.lean +++ b/tests/playground/parser/test1.lean @@ -1,59 +1,28 @@ import parser +open Parser +local infix ` ; `:10 := Parser.andthen +local infix ` || `:5 := Parser.orelse -abbrev Parser (α : Type) := ReaderT Nat (Lean.Parser.ParserM Unit Unit Unit) α +def mkTestKind : IO SyntaxNodeKind := nextKind `test +@[init mkTestKind] constant testKind : SyntaxNodeKind := default _ -open Lean.Parser +@[inline2] +def numPair : BasicParser := +node testKind $ + symbol "("; number; symbol ","; number; symbol ")" --- setOption pp.implicit True --- setOption Trace.Compiler.boxed True +def numPairs : BasicParser := +many numPair -def testP : Parser Unit := -many1 (str "++" <|> str "**" <|> (str "--" *> takeUntil (= '\n') *> any *> pure ())) +@[noinline] def test (p : BasicParser) (s : String) : IO Unit := +match p.run s with +| Except.error msg := IO.println "error" +| Except.ok stx := IO.println "ok" -def testP2 : Parser Unit := -many1 ((strLit *> whitespace *> pure ()) <|> (str "--" *> takeUntil (= '\n') *> any *> pure ())) - -def testP3 : Parser Unit := -leanWhitespace - -def testParser (x : Parser Unit) (input : String) : String := -match (x 0).run () () input with -| Lean.Parser.Result.ok _ i _ _ := "Ok at " ++ toString i -| Result.error msg i _ _ := "Error at " ++ toString i ++ ": " ++ msg - -def IO.testParser {α : Type} [HasToString α] (x : Parser α) (input : String) : IO Unit := -match (x 0).run () () input with -| Lean.Parser.Result.ok a _ _ _ := IO.println ("Ok " ++ toString a) -| _ := throw "ERROR" - -@[noinline] def test (p : Parser Unit) (s : String) : IO Unit := -IO.println (testParser p s) - -def mkBigString : Nat → String → String +def mkNumPairString : Nat → String → String | 0 s := s -| (n+1) s := mkBigString n (s ++ "-- new comment\n") - -def mkBigString2 : Nat → String → String -| 0 s := s -| (n+1) s := mkBigString2 n (s ++ "\"hello\\nworld\"\n-- comment\n") - -def mkBigString3 : Nat → String → String -| 0 s := s -| (n+1) s := mkBigString3 n (s ++ "/- /- comment 1 -/ -/ \n -- comment 2 \n \t \n ") - -def prof {α : Type} (msg : String) (p : IO α) : IO α := -let msg₁ := "Time for '" ++ msg ++ "':" in -let msg₂ := "Memory usage for '" ++ msg ++ "':" in -allocprof msg₂ (timeit msg₁ p) +| (n+1) s := mkNumPairString n (s ++ "(" ++ toString n ++ ", " ++ toString n ++ ") -- comment\n") def main (xs : List String) : IO Unit := -do -let s₁ := mkBigString xs.head.toNat "", -let s₂ := s₁ ++ "bad" ++ mkBigString 20 "", -let s₃ := mkBigString2 xs.head.toNat "", -let s₄ := mkBigString3 xs.head.toNat "", -IO.println s₄.length, -prof "Parser 1" (test testP s₁), -prof "Parser 2" (test testP s₂), -prof "Parser 3" (test testP2 s₃), -prof "Parser 4" (test testP3 s₄) +let s := mkNumPairString xs.head.toNat "" in +test numPairs s