namespace Lean namespace Parser abbrev Pos := String.Pos /- σ is the non-backtrackable State δ is the backtrackable State μ is the extra error Message data -/ inductive Result (σ δ μ α : Type) | ok {} (a : α) (i : Pos) (st : σ) (bst : δ) : Result | error {} (msg : String) (i : Pos) (st : σ) (extra : Option μ) : Result inductive Result.IsOk {σ δ μ α : Type} : Result σ δ μ α → Prop | mk (a : α) (i : Pos) (st : σ) (bst : δ) : Result.IsOk (Result.ok a i st bst) theorem errorIsNotOk {σ δ μ α : Type} {msg : String} {i : Pos} {st : σ} {extra : Option μ} (h : Result.IsOk (@Result.error σ δ μ α msg i st extra)) : False := match h with end @[inline] def unreachableError {σ δ μ α β : Type} {msg : String} {i : Pos} {st : σ} {extra : Option μ} (h : Result.IsOk (@Result.error σ δ μ α msg i st extra)) : β := False.elim (errorIsNotOk h) def input (σ δ μ : Type) : Type := { r : Result σ δ μ Unit // r.IsOk } @[inline] def mkInput {σ δ μ : Type} (i : Pos) (st : σ) (bst : δ) : input σ δ μ := ⟨Result.ok () i st bst, Result.IsOk.mk _ _ _ _⟩ def ParserM (σ δ μ α : Type) := String → input σ δ μ → Result σ δ μ α variables {σ δ μ α β : Type} namespace ParserM protected def default : ParserM σ δ μ α := λ _ inp, match inp with | ⟨Result.ok _ i st bst, h⟩ := Result.error "" i st none | ⟨Result.error _ _ _ _, h⟩ := unreachableError h instance : Inhabited (ParserM σ δ μ α) := ⟨ParserM.default⟩ @[inline] def run (p : ParserM σ δ μ α) (st : σ) (bst : δ) (s : String) : Result σ δ μ α := p s (mkInput 0 st bst) @[inline] def pure (a : α) : ParserM σ δ μ α := λ _ inp, match inp with | ⟨Result.ok _ it st bst, h⟩ := Result.ok a it st bst | ⟨Result.error _ _ _ _, h⟩ := unreachableError h @[inline] def bind (x : ParserM σ δ μ α) (f : α → ParserM σ δ μ β) : ParserM σ δ μ β := λ str inp, match x str inp with | Result.ok a i st bst := f a str (mkInput i st bst) | Result.error msg i st etx := Result.error msg i st etx instance isMonad : Monad (ParserM σ δ μ) := {pure := @ParserM.pure _ _ _, bind := @ParserM.bind _ _ _} def mkError (r : input σ δ μ) (msg : String) : Result σ δ μ α := match r with | ⟨Result.ok _ i st _, _⟩ := Result.error msg i st none | ⟨Result.error _ _ _ _, h⟩ := unreachableError h @[inline] def orelse (p q : ParserM σ δ μ α) : ParserM σ δ μ α := λ str inp, match inp with | ⟨Result.ok _ i₁ _ bst₁, _⟩ := (match p str inp with | err@(Result.error _ i₂ st₂ _) := if i₁ == i₂ then q str (mkInput i₁ st₂ bst₁) else err | other := other) | ⟨Result.error _ _ _ _, h⟩ := unreachableError h @[inline] def failure : ParserM σ δ μ α := λ _ inp, mkError inp "failure" instance : Alternative (ParserM σ δ μ) := { orelse := @ParserM.orelse _ _ _, failure := @ParserM.failure _ _ _, .. ParserM.isMonad } @[inline] def currPos : input σ δ μ → Pos | ⟨Result.ok _ i _ _, _⟩ := i | ⟨Result.error _ _ _ _, h⟩ := unreachableError h @[inline] def try {α : Type} (p : ParserM σ δ μ α) : ParserM σ δ μ α := λ str inp, match inp with | ⟨Result.ok _ i _ _, _⟩ := (match p str inp with | Result.error msg _ st x := Result.error msg i st x | other := other) | ⟨Result.error _ _ _ _, h⟩ := unreachableError h @[inline] def lookahead (p : ParserM σ δ μ α) : ParserM σ δ μ α := λ str inp, match inp with | ⟨Result.ok _ i _ bst, _⟩ := (match p str inp with | Result.ok a _ st _ := Result.ok a i st bst | other := other) | ⟨Result.error _ _ _ _, h⟩ := unreachableError h @[specialize] def satisfy (p : Char → Bool) (errorMsg := "unexpected character") : ParserM σ δ μ Char := λ str inp, match inp with | ⟨Result.ok _ i st bst, _⟩ := if str.atEnd i then mkError inp "end of input" else let c := str.get i in if p c then Result.ok c (str.next i) st bst else mkError inp errorMsg | ⟨Result.error _ _ _ _, h⟩ := unreachableError h @[inline] def ch (c : Char) : ParserM σ δ μ Char := satisfy (== c) ("expected " ++ repr c) @[inline] def any : ParserM σ δ μ Char := satisfy (λ _, true) @[specialize] partial def takeUntilAux (p : Char → Bool) : ParserM σ δ μ Unit | str inp := match inp with | ⟨Result.ok _ i st bst, _⟩ := if str.atEnd i then inp.val else let c := str.get i in if p c then inp.val else takeUntilAux str (mkInput (str.next i) st bst) | ⟨Result.error _ _ _ _, h⟩ := unreachableError h @[inline] def takeUntil (p : Char → Bool) : ParserM σ δ μ Unit := λ str inp, takeUntilAux p str inp partial def strAux (s : String) (errorMsg : String) (initPos : Pos) : Pos → ParserM σ δ μ Unit | j str inp := if s.atEnd j then inp.val else match inp with | ⟨Result.ok _ i st bst, _⟩ := if str.atEnd i then Result.error errorMsg initPos st none else if s.get j == str.get i then strAux (s.next j) str (mkInput (str.next i) st bst) else Result.error errorMsg initPos st none | ⟨Result.error _ _ _ _, h⟩ := unreachableError h @[inline] def str (s : String) : ParserM σ δ μ Unit := λ str inp, let initPos := currPos inp in strAux s ("expected " ++ repr s) initPos 0 str inp @[specialize] partial def manyAux (a : α) (p : ParserM σ δ μ α) : ParserM σ δ μ α | str inp := match inp with | ⟨Result.ok _ i₀ _ bst₀, _⟩ := (match p str inp with | Result.ok _ i st bst := manyAux str (mkInput i st bst) | Result.error _ _ st _ := Result.ok a i₀ st bst₀) | ⟨Result.error _ _ _ _, h⟩ := unreachableError h @[inline] def many (p : ParserM σ δ μ Unit) : ParserM σ δ μ Unit := manyAux () p @[inline] def many1 (p : ParserM σ δ μ Unit) : ParserM σ δ μ Unit := p *> many p def pos : ParserM σ δ μ Pos := λ str inp, match inp with | ⟨Result.ok _ i st bst, _⟩ := Result.ok i i st bst | ⟨Result.error _ _ _ _, h⟩ := unreachableError h def error {α : Type} (msg : String) : ParserM σ δ μ α := λ _ inp, mkError inp msg def errorAt {α : Type} (pos : Pos) (msg : String) : ParserM σ δ μ α := λ _ inp, match inp with | ⟨Result.ok _ _ st _, _⟩ := Result.error msg pos st none | ⟨Result.error _ _ _ _, h⟩ := unreachableError h def hexDigit : ParserM σ δ μ Nat | str inp := match inp with | ⟨Result.ok _ i st bst, _⟩ := if str.atEnd i then mkError inp "unexpected end of input" else let c := str.get i in let i := str.next i in if c.isDigit then Result.ok (c.toNat - '0'.toNat) i st bst else if 'a' <= c && c <= 'f' then Result.ok (10 + c.toNat - 'a'.toNat) i st bst else if 'A' <= c && c <= 'F' then Result.ok (10 + c.toNat - 'A'.toNat) i st bst else mkError inp "invalid hexadecimal numeral, hexadecimal digit expected" | ⟨Result.error _ _ _ _, h⟩ := unreachableError h def quotedChar : ParserM σ δ μ Char := do p ← pos, c ← any, if c = '\\' then pure '\\' else if c = '\"' then pure '\"' else if c = '\'' then pure '\'' else if c = 'n' then pure '\n' else if c = 't' then pure '\t' else if c = 'x' then do { d₁ ← hexDigit, d₂ ← hexDigit, pure $ Char.ofNat (16*d₁ + d₂) } else if c = 'u' then do { d₁ ← hexDigit, d₂ ← hexDigit, d₃ ← hexDigit, d₄ ← hexDigit, pure $ Char.ofNat (16*(16*(16*d₁ + d₂) + d₃) + d₄) } else errorAt p "invalid escape sequence" partial def strLitAux : String → ParserM σ δ μ String | out := do c ← any, if c == '\"' then pure out else if c == '\\' then do c ← quotedChar, strLitAux (out.push c) else strLitAux (out.push c) def strLit : ParserM σ δ μ String := satisfy (== '\"') "expected string literal" *> strLitAux "" partial def finishCommentBlock : Nat → ParserM σ δ μ Unit | nesting str inp := match inp with | ⟨Result.ok _ i st bst, _⟩ := if str.atEnd i then mkError inp "end of input" else let c := str.get i in let i := str.next i in if c == '-' then if str.atEnd i then mkError inp "end of input" else let c := str.get i in if c == '/' then -- "-/" end of comment if nesting == 1 then Result.ok () (str.next i) st bst else finishCommentBlock (nesting-1) str (mkInput (str.next i) st bst) else finishCommentBlock nesting str (mkInput i st bst) else if c == '/' then if str.atEnd i then mkError inp "end of input" else let c := str.get i in if c == '-' then finishCommentBlock (nesting+1) str (mkInput (str.next i) st bst) else finishCommentBlock nesting str (mkInput i st bst) else finishCommentBlock nesting str (mkInput i st bst) | ⟨Result.error _ _ _ _, h⟩ := unreachableError h partial def leanWhitespace : ParserM σ δ μ Unit | str inp := match inp with | ⟨Result.ok _ i st bst, _⟩ := if str.atEnd i then inp.val else let c := str.get i in if c.isWhitespace then leanWhitespace str (mkInput (str.next i) st bst) else if c == '-' then let i := str.next i in let c := str.get i in if c == '-' then ((takeUntil (= '\n')) *> leanWhitespace) str (mkInput i st bst) else inp.val else if c == '/' then let i := str.next i in let c := str.get i in if c == '-' then let i := str.next i in let c := str.get i in if c == '-' then inp.val -- "/--" doc comment is an actual token else ((finishCommentBlock 1) *> leanWhitespace) str (mkInput i st bst) else inp.val else inp.val | ⟨Result.error _ _ _ _, h⟩ := unreachableError h end ParserM class monadParser (σ : outParam Type) (δ : outParam Type) (μ : outParam Type) (m : Type → Type) := (lift {} {α : Type} : ParserM σ δ μ α → m α) (map {} {α : Type} : (Π β, ParserM σ δ μ β → ParserM σ δ μ β) → m α → m α) instance monadParserBase : monadParser σ δ μ (ParserM σ δ μ) := { lift := λ α, id, map := λ α f x, f α x } instance monadParserTrans {m n : Type → Type} [HasMonadLift m n] [MonadFunctor m m n n] [monadParser σ δ μ m] : monadParser σ δ μ n := { lift := λ α p, monadLift (monadParser.lift p : m α), map := λ α f x, monadMap (λ β x, (monadParser.map @f x : m β)) x } class monadParserAux (σ : outParam Type) (δ : outParam Type) (μ : outParam Type) (m : Type → Type) := (map {} {α : Type} : (ParserM σ δ μ α → ParserM σ δ μ α) → m α → m α) instance monadParserAuxBase : monadParserAux σ δ μ (ParserM σ δ μ) := { map := λ α, id } instance monadParserAuxReader {m : Type → Type} {ρ : Type} [Monad m] [monadParserAux σ δ μ m] : monadParserAux σ δ μ (ReaderT ρ m) := { map := λ α f x r, (monadParserAux.map f : m α → m α) (x r) } section variables {m : Type → Type} [monadParser σ δ μ m] @[inline] def satisfy (p : Char → Bool) : m Char := monadParser.lift (ParserM.satisfy p) def ch (c : Char) : m Char := monadParser.lift (ParserM.ch c) def alpha : m Char := satisfy Char.isAlpha def digit : m Char := satisfy Char.isDigit def upper : m Char := satisfy Char.isUpper def lower : m Char := satisfy Char.isLower def any : m Char := satisfy (λ _, true) @[inline] def takeUntil (p : Char → Bool) : m Unit := monadParser.lift (ParserM.takeUntil p) @[inline] def str (s : String) : m Unit := monadParser.lift (ParserM.str s) @[inline] def try (p : m α) : m α := monadParser.map (λ _ p, ParserM.try p) p @[inline] def lookahead (p : m α) : m α := monadParser.map (λ _ p, ParserM.lookahead p) p @[inline] def takeWhile (p : Char → Bool) : m Unit := takeUntil (λ c, !p c) @[inline] def whitespace : m Unit := takeWhile Char.isWhitespace @[inline] def strLit : m String := monadParser.lift (ParserM.strLit) @[inline] def leanWhitespace : m Unit := monadParser.lift ParserM.leanWhitespace end section variables {m : Type → Type} [monadParserAux σ δ μ m] @[inline] def many (p : m Unit) : m Unit := monadParserAux.map ParserM.many p @[inline] def many1 (p : m Unit) : m Unit := monadParserAux.map ParserM.many1 p end end Parser end Lean abbrev Parser (α : Type) := ReaderT Nat (Lean.Parser.ParserM Unit Unit Unit) α open Lean.Parser -- setOption pp.implicit True -- setOption Trace.Compiler.boxed True def testP : Parser Unit := many1 (str "++" <|> str "**" <|> (str "--" *> takeUntil (= '\n') *> any *> pure ())) def testP2 : Parser Unit := many1 ((strLit *> whitespace *> pure ()) <|> (str "--" *> takeUntil (= '\n') *> any *> pure ())) def testP3 : Parser Unit := leanWhitespace def testParser (x : Parser Unit) (input : String) : String := match (x 0).run () () input with | Lean.Parser.Result.ok _ i _ _ := "Ok at " ++ toString i | Result.error msg i _ _ := "Error at " ++ toString i ++ ": " ++ msg def IO.testParser {α : Type} [HasToString α] (x : Parser α) (input : String) : IO Unit := match (x 0).run () () input with | Lean.Parser.Result.ok a _ _ _ := IO.println ("Ok " ++ toString a) | _ := throw "ERROR" @[noinline] def test (p : Parser Unit) (s : String) : IO Unit := IO.println (testParser p s) def mkBigString : Nat → String → String | 0 s := s | (n+1) s := mkBigString n (s ++ "-- new comment\n") def mkBigString2 : Nat → String → String | 0 s := s | (n+1) s := mkBigString2 n (s ++ "\"hello\\nworld\"\n-- comment\n") def mkBigString3 : Nat → String → String | 0 s := s | (n+1) s := mkBigString3 n (s ++ "/- /- comment 1 -/ -/ \n -- comment 2 \n \t \n ") def prof {α : Type} (msg : String) (p : IO α) : IO α := let msg₁ := "Time for '" ++ msg ++ "':" in let msg₂ := "Memory usage for '" ++ msg ++ "':" in allocprof msg₂ (timeit msg₁ p) def tst1 : IO Unit := IO.testParser strLit "\"hello\n\"" def check {α} [HasBeq α] (p : Parser α) (s : String) (e : α) : IO Unit := match (p 0).run () () s with | Lean.Parser.Result.ok v i _ _ := do IO.println ("Ok at " ++ toString i), unless (v == e) (throw "unexpected result") | Result.error msg _ _ _ := throw msg def checkFailure {α} (p : Parser α) (s : String) : IO Unit := match (p 0).run () () s with | Lean.Parser.Result.ok _ i _ _ := throw "Worked" | Result.error msg i _ _ := IO.println ("failed as expected at " ++ toString i ++ ", error: " ++ toString msg) def str' (s : String) : Parser String := str s *> pure s def tst2 : IO Unit := do check (ch 'a') "a" 'a', check any "a" 'a', check any "b" 'b', check (str' "foo" <|> str' "bla" <|> str' "boo") "bla" "bla", check (try (str' "foo" *> str' "foo") <|> str' "foo2" <|> str' "boo") "foo2" "foo2", checkFailure ((str' "foo" *> str' "abc") <|> str' "foo2" <|> str' "boo") "foo2", check (str' "foofoo" <|> str' "foo2" <|> str' "boo") "foo2" "foo2", check (leanWhitespace *> str' "hello") " \n-- comment\nhello" "hello", check (takeUntil (== '\n') *> ch '\n' *> str' "test") "\ntest" "test", check (takeUntil (== 't') *> str' "test") "test" "test", check (takeUntil (== '\n') *> ch '\n' *> str' "test") "abc\ntest" "test", check (try (ch 'a' *> ch 'b') <|> (ch 'a' *> ch 'c')) "ac" 'c', checkFailure ((ch 'a' *> ch 'b') <|> (ch 'a' *> ch 'c')) "ac", check (lookahead (ch 'a')) "abc" 'a', check (lookahead (ch 'a') *> str' "abc") "abc" "abc", check strLit "\"abc\\nd\"" "abc\nd", checkFailure strLit "abc\\nd\"", checkFailure strLit "\"abc", checkFailure strLit "\"abc\\ab̈\"" def main (xs : List String) : IO Unit := do tst1, tst2, let s₁ := mkBigString xs.head.toNat "", let s₂ := s₁ ++ "bad" ++ mkBigString 20 "", let s₃ := mkBigString2 xs.head.toNat "", let s₄ := mkBigString3 xs.head.toNat "", IO.println s₄.length, prof "Parser 1" (test testP s₁), prof "Parser 2" (test testP s₂), prof "Parser 3" (test testP2 s₃), prof "Parser 4" (test testP3 s₄)