255 lines
9.9 KiB
Text
255 lines
9.9 KiB
Text
namespace lean
|
||
namespace parser
|
||
|
||
abbrev pos := string.utf8_pos
|
||
|
||
/-
|
||
σ is the non-backtrackable state
|
||
δ is the backtrackable state
|
||
μ is the extra error message data
|
||
-/
|
||
inductive result (σ δ μ α : Type)
|
||
| ok {} (a : α) (i : pos) (st : σ) (bst : δ) (eps : bool) : result
|
||
| error {} (msg : string) (i : pos) (st : σ) (extra : option μ) (eps : bool) : result
|
||
|
||
inductive result.is_ok {σ δ μ α : Type} : result σ δ μ α → Prop
|
||
| mk (a : α) (i : pos) (st : σ) (bst : δ) (eps : bool) : result.is_ok (result.ok a i st bst eps)
|
||
|
||
theorem error_is_not_ok {σ δ μ α : Type} {msg : string} {i : pos} {st : σ} {extra : option μ} {eps : bool}
|
||
(h : result.is_ok (@result.error σ δ μ α msg i st extra eps)) : false :=
|
||
match h with end
|
||
|
||
@[inline] def unreachable_error {σ δ μ α β : Type} {msg : string} {i : pos} {st : σ} {extra : option μ} {eps : bool}
|
||
(h : result.is_ok (@result.error σ δ μ α msg i st extra eps)) : β :=
|
||
false.elim (error_is_not_ok h)
|
||
|
||
def input (σ δ μ : Type) : Type := { r : result σ δ μ unit // r.is_ok }
|
||
|
||
@[inline] def mk_input {σ δ μ : Type} (i : pos) (st : σ) (bst : δ) (eps := tt) : input σ δ μ :=
|
||
⟨result.ok () i st bst eps, result.is_ok.mk _ _ _ _ _⟩
|
||
|
||
def parser_m (σ δ μ α : Type) :=
|
||
string → input σ δ μ → result σ δ μ α
|
||
|
||
variables {σ δ μ α β : Type}
|
||
|
||
@[inline] def parser_m.run (p : parser_m σ δ μ α) (st : σ) (bst : δ) (s : string) : result σ δ μ α :=
|
||
p s (mk_input 0 st bst)
|
||
|
||
@[inline] def parser_m.pure (a : α) : parser_m σ δ μ α :=
|
||
λ _ inp,
|
||
match inp with
|
||
| ⟨result.ok _ it st bst _, h⟩ := result.ok a it st bst tt
|
||
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
|
||
|
||
@[inline_if_reduce] def strict_or (b₁ b₂ : bool) := b₁ || b₂
|
||
@[inline_if_reduce] def strict_and (b₁ b₂ : bool) := b₁ && b₂
|
||
|
||
@[inline] def parser_m.bind (x : parser_m σ δ μ α) (f : α → parser_m σ δ μ β) : parser_m σ δ μ β :=
|
||
λ str inp,
|
||
match x str inp with
|
||
| result.ok a i st bst e₁ :=
|
||
(match f a str (mk_input i st bst) with
|
||
| result.ok b i st bst e₂ := result.ok b i st bst (strict_and e₁ e₂)
|
||
| result.error msg i st ext e₂ := result.error msg i st ext (strict_and e₁ e₂))
|
||
| result.error msg i st etx e := result.error msg i st etx e
|
||
|
||
instance parser_m_is_monad : monad (parser_m σ δ μ) :=
|
||
{pure := @parser_m.pure _ _ _, bind := @parser_m.bind _ _ _}
|
||
|
||
def mk_error (r : input σ δ μ) (msg : string) (eps := tt) : result σ δ μ α :=
|
||
match r with
|
||
| ⟨result.ok _ i c s _, _⟩ := result.error msg i c none eps
|
||
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
|
||
|
||
@[inline] def parser_m.orelse (p q : parser_m σ δ μ α) : parser_m σ δ μ α :=
|
||
λ str inp,
|
||
match inp with
|
||
| ⟨result.ok _ i₁ _ bst₁ _, _⟩ :=
|
||
(match p str inp with
|
||
| result.error _ _ st₂ _ tt := q str (mk_input i₁ st₂ bst₁)
|
||
| other := other)
|
||
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
|
||
|
||
@[inline] def parser_m.failure : parser_m σ δ μ α :=
|
||
λ _ inp, mk_error inp "failure"
|
||
|
||
instance : alternative (parser_m σ δ μ) :=
|
||
{ orelse := @parser_m.orelse _ _ _,
|
||
failure := @parser_m.failure _ _ _,
|
||
.. parser.parser_m_is_monad }
|
||
|
||
def set_silent_error : result σ δ μ α → result σ δ μ α
|
||
| (result.error msg i st ext _) := result.error msg i st ext tt
|
||
| other := other
|
||
|
||
@[inline] def curr (str : string) (i : pos) : char := str.utf8_get i
|
||
@[inline] def next (str : string) (i : pos) : pos := str.utf8_next i
|
||
@[inline] def at_end (str : string) (i : pos) : bool := str.utf8_at_end i
|
||
|
||
@[inline] def curr_pos : input σ δ μ → pos
|
||
| ⟨result.ok _ i _ _ _, _⟩ := i
|
||
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
|
||
|
||
namespace prim
|
||
@[inline] def try {α : Type} (p : parser_m σ δ μ α) : parser_m σ δ μ α :=
|
||
λ str inp, set_silent_error (p str inp)
|
||
|
||
@[inline] def lookahead (p : parser_m σ δ μ α) : parser_m σ δ μ α :=
|
||
λ str inp,
|
||
match inp with
|
||
| ⟨result.ok _ i _ bst _, _⟩ :=
|
||
(match p str inp with
|
||
| result.ok a _ st _ _ := result.ok a i st bst tt
|
||
| other := other)
|
||
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
|
||
|
||
@[specialize] def satisfy (p : char → bool) : parser_m σ δ μ char :=
|
||
λ str inp,
|
||
match inp with
|
||
| ⟨result.ok _ i st bst _, _⟩ :=
|
||
if at_end str i then mk_error inp "end of input"
|
||
else let c := curr str i in
|
||
if p c then result.ok c (next str i) st bst ff
|
||
else mk_error inp "unexpected character"
|
||
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
|
||
|
||
@[specialize] def take_until_aux (p : char → bool) : nat → parser_m σ δ μ unit
|
||
| 0 str inp := inp.val
|
||
| (n+1) str inp :=
|
||
match inp with
|
||
| ⟨result.ok _ i st bst _, _⟩ :=
|
||
if at_end str i then inp.val
|
||
else let c := curr str i in
|
||
if p c then inp.val
|
||
else take_until_aux n str (mk_input (next str i) st bst ff)
|
||
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
|
||
|
||
@[inline] def take_until (p : char → bool) : parser_m σ δ μ unit :=
|
||
λ str inp, take_until_aux p str.length str inp
|
||
|
||
def str_aux (in_s : string) (s : string) (error_msg : string) : nat → input σ δ μ → pos → result σ δ μ unit
|
||
| 0 inp j := mk_error inp error_msg
|
||
| (n+1) inp j :=
|
||
if at_end s j then inp.val
|
||
else
|
||
match inp with
|
||
| ⟨result.ok _ i st bst e, _⟩ :=
|
||
if at_end in_s i then mk_error inp error_msg
|
||
else if curr in_s i = curr s j then str_aux n (mk_input (next in_s i) st bst ff) (next s j)
|
||
else mk_error inp error_msg
|
||
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
|
||
|
||
@[inline] def str (s : string) : parser_m σ δ μ unit :=
|
||
λ in_str inp, str_aux in_str s ("expected '" ++ repr s ++ "'") in_str.length inp 0
|
||
|
||
@[specialize] def many_loop (a : α) (p : parser_m σ δ μ α) : nat → bool → parser_m σ δ μ α
|
||
| 0 fst := pure a
|
||
| (k+1) fst := λ str inp,
|
||
match inp with
|
||
| ⟨result.ok _ i₀ _ bst₀ _, _⟩ :=
|
||
(match p str inp with
|
||
| result.ok _ i st bst _ := many_loop k ff str (mk_input i st bst)
|
||
| result.error _ _ st _ _ := result.ok a i₀ st bst₀ fst)
|
||
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
|
||
|
||
-- Auxiliary function used to lift many_aux
|
||
@[inline] def many_aux (a : α) (p : parser_m σ δ μ α) : parser_m σ δ μ α :=
|
||
λ str inp, many_loop a p str.length tt str inp
|
||
|
||
@[inline] def many (p : parser_m σ δ μ unit) : parser_m σ δ μ unit :=
|
||
many_aux () p
|
||
|
||
@[inline] def many1 (p : parser_m σ δ μ unit) : parser_m σ δ μ unit :=
|
||
p *> many p
|
||
|
||
end prim
|
||
|
||
class monad_parser (σ : out_param Type) (δ : out_param Type) (μ : out_param Type) (m : Type → Type) :=
|
||
(lift {} {α : Type} : parser_m σ δ μ α → m α)
|
||
(map {} {α : Type} : (Π β, parser_m σ δ μ β → parser_m σ δ μ β) → m α → m α)
|
||
|
||
instance monad_parser_base : monad_parser σ δ μ (parser_m σ δ μ) :=
|
||
{ lift := λ α, id,
|
||
map := λ α f x, f α x }
|
||
|
||
instance monad_parser_trans {m n : Type → Type} [has_monad_lift m n] [monad_functor m m n n] [monad_parser σ δ μ m] : monad_parser σ δ μ n :=
|
||
{ lift := λ α p, monad_lift (monad_parser.lift p : m α),
|
||
map := λ α f x, monad_map (λ β x, (monad_parser.map @f x : m β)) x }
|
||
|
||
class monad_parser_aux (σ : out_param Type) (δ : out_param Type) (μ : out_param Type) (m : Type → Type) :=
|
||
(map {} {α : Type} : (parser_m σ δ μ α → parser_m σ δ μ α) → m α → m α)
|
||
|
||
instance monad_parser_aux_base : monad_parser_aux σ δ μ (parser_m σ δ μ) :=
|
||
{ map := λ α, id }
|
||
|
||
instance monad_parser_aux_reader {m : Type → Type} {ρ : Type} [monad m] [monad_parser_aux σ δ μ m] : monad_parser_aux σ δ μ (reader_t ρ m) :=
|
||
{ map := λ α f x r, (monad_parser_aux.map f : m α → m α) (x r) }
|
||
|
||
section
|
||
variables {m : Type → Type} [monad_parser σ δ μ m]
|
||
|
||
@[inline] def satisfy (p : char → bool) : m char := monad_parser.lift (prim.satisfy p)
|
||
def ch (c : char) : m char := satisfy (= c)
|
||
def alpha : m char := satisfy char.is_alpha
|
||
def digit : m char := satisfy char.is_digit
|
||
def upper : m char := satisfy char.is_upper
|
||
def lower : m char := satisfy char.is_lower
|
||
def any : m char := satisfy (λ _, true)
|
||
|
||
@[inline] def take_until (p : char → bool) : m unit := monad_parser.lift (prim.take_until p)
|
||
|
||
@[inline] def str (s : string) : m unit := monad_parser.lift (prim.str s)
|
||
|
||
@[inline] def lookahead (p : m α) : m α :=
|
||
monad_parser.map (λ β p, prim.lookahead p) p
|
||
|
||
@[inline] def take_while (p : char → bool) : m unit := take_until (λ c, !p c)
|
||
|
||
@[inline] def whitespace : m unit := take_while char.is_whitespace
|
||
|
||
end
|
||
|
||
section
|
||
variables {m : Type → Type} [monad_parser_aux σ δ μ m]
|
||
|
||
@[inline] def many (p : m unit) : m unit := monad_parser_aux.map prim.many p
|
||
@[inline] def many1 (p : m unit) : m unit := monad_parser_aux.map prim.many1 p
|
||
|
||
end
|
||
|
||
end parser
|
||
end lean
|
||
|
||
abbrev parser := reader_t nat (lean.parser.parser_m unit unit unit) unit
|
||
|
||
open lean.parser
|
||
|
||
-- set_option pp.implicit true
|
||
-- set_option trace.compiler.boxed true
|
||
|
||
def test_p : parser :=
|
||
many1 (str "++" <|> str "**" <|> (str "--" *> take_until (= '\n') *> any *> pure ()))
|
||
|
||
def test_parser (x : parser) (input : string) : string :=
|
||
match (x 0).run () () input with
|
||
| lean.parser.result.ok _ i _ _ _ := "Ok at " ++ to_string i
|
||
| result.error msg i _ _ _ := "Error at " ++ to_string i ++ ": " ++ msg
|
||
|
||
@[noinline] def test (s : string) : io unit :=
|
||
io.println (test_parser test_p s)
|
||
|
||
def mk_big_string : nat → string → string
|
||
| 0 s := s
|
||
| (n+1) s := mk_big_string n (s ++ "-- new comment\n")
|
||
|
||
def prof {α : Type} (msg : string) (p : io α) : io α :=
|
||
let msg₁ := "Time for '" ++ msg ++ "':" in
|
||
let msg₂ := "Memory usage for '" ++ msg ++ "':" in
|
||
allocprof msg₂ (timeit msg₁ p)
|
||
|
||
def main (xs : list string) : io unit :=
|
||
let s₁ := mk_big_string xs.head.to_nat "" in
|
||
let s₂ := s₁ ++ "bad" ++ mk_big_string 20 "" in
|
||
prof "parser 1" (test s₁) *>
|
||
prof "parser 2" (test s₂)
|