lean4-htt/tests/playground/parser.lean
2019-03-19 16:34:08 -07:00

255 lines
9.9 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

namespace lean
namespace parser
abbrev pos := string.utf8_pos
/-
σ is the non-backtrackable state
δ is the backtrackable state
μ is the extra error message data
-/
inductive result (σ δ μ α : Type)
| ok {} (a : α) (i : pos) (st : σ) (bst : δ) (eps : bool) : result
| error {} (msg : string) (i : pos) (st : σ) (extra : option μ) (eps : bool) : result
inductive result.is_ok {σ δ μ α : Type} : result σ δ μ α → Prop
| mk (a : α) (i : pos) (st : σ) (bst : δ) (eps : bool) : result.is_ok (result.ok a i st bst eps)
theorem error_is_not_ok {σ δ μ α : Type} {msg : string} {i : pos} {st : σ} {extra : option μ} {eps : bool}
(h : result.is_ok (@result.error σ δ μ α msg i st extra eps)) : false :=
match h with end
@[inline] def unreachable_error {σ δ μ α β : Type} {msg : string} {i : pos} {st : σ} {extra : option μ} {eps : bool}
(h : result.is_ok (@result.error σ δ μ α msg i st extra eps)) : β :=
false.elim (error_is_not_ok h)
def input (σ δ μ : Type) : Type := { r : result σ δ μ unit // r.is_ok }
@[inline] def mk_input {σ δ μ : Type} (i : pos) (st : σ) (bst : δ) (eps := tt) : input σ δ μ :=
⟨result.ok () i st bst eps, result.is_ok.mk _ _ _ _ _⟩
def parser_m (σ δ μ α : Type) :=
string → input σ δ μ → result σ δ μ α
variables {σ δ μ α β : Type}
@[inline] def parser_m.run (p : parser_m σ δ μ α) (st : σ) (bst : δ) (s : string) : result σ δ μ α :=
p s (mk_input 0 st bst)
@[inline] def parser_m.pure (a : α) : parser_m σ δ μ α :=
λ _ inp,
match inp with
| ⟨result.ok _ it st bst _, h⟩ := result.ok a it st bst tt
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
@[inline_if_reduce] def strict_or (b₁ b₂ : bool) := b₁ || b₂
@[inline_if_reduce] def strict_and (b₁ b₂ : bool) := b₁ && b₂
@[inline] def parser_m.bind (x : parser_m σ δ μ α) (f : α → parser_m σ δ μ β) : parser_m σ δ μ β :=
λ str inp,
match x str inp with
| result.ok a i st bst e₁ :=
(match f a str (mk_input i st bst) with
| result.ok b i st bst e₂ := result.ok b i st bst (strict_and e₁ e₂)
| result.error msg i st ext e₂ := result.error msg i st ext (strict_and e₁ e₂))
| result.error msg i st etx e := result.error msg i st etx e
instance parser_m_is_monad : monad (parser_m σ δ μ) :=
{pure := @parser_m.pure _ _ _, bind := @parser_m.bind _ _ _}
def mk_error (r : input σ δ μ) (msg : string) (eps := tt) : result σ δ μ α :=
match r with
| ⟨result.ok _ i c s _, _⟩ := result.error msg i c none eps
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
@[inline] def parser_m.orelse (p q : parser_m σ δ μ α) : parser_m σ δ μ α :=
λ str inp,
match inp with
| ⟨result.ok _ i₁ _ bst₁ _, _⟩ :=
(match p str inp with
| result.error _ _ st₂ _ tt := q str (mk_input i₁ st₂ bst₁)
| other := other)
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
@[inline] def parser_m.failure : parser_m σ δ μ α :=
λ _ inp, mk_error inp "failure"
instance : alternative (parser_m σ δ μ) :=
{ orelse := @parser_m.orelse _ _ _,
failure := @parser_m.failure _ _ _,
.. parser.parser_m_is_monad }
def set_silent_error : result σ δ μ α → result σ δ μ α
| (result.error msg i st ext _) := result.error msg i st ext tt
| other := other
@[inline] def curr (str : string) (i : pos) : char := str.utf8_get i
@[inline] def next (str : string) (i : pos) : pos := str.utf8_next i
@[inline] def at_end (str : string) (i : pos) : bool := str.utf8_at_end i
@[inline] def curr_pos : input σ δ μ → pos
| ⟨result.ok _ i _ _ _, _⟩ := i
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
namespace prim
@[inline] def try {α : Type} (p : parser_m σ δ μ α) : parser_m σ δ μ α :=
λ str inp, set_silent_error (p str inp)
@[inline] def lookahead (p : parser_m σ δ μ α) : parser_m σ δ μ α :=
λ str inp,
match inp with
| ⟨result.ok _ i _ bst _, _⟩ :=
(match p str inp with
| result.ok a _ st _ _ := result.ok a i st bst tt
| other := other)
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
@[specialize] def satisfy (p : char → bool) : parser_m σ δ μ char :=
λ str inp,
match inp with
| ⟨result.ok _ i st bst _, _⟩ :=
if at_end str i then mk_error inp "end of input"
else let c := curr str i in
if p c then result.ok c (next str i) st bst ff
else mk_error inp "unexpected character"
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
@[specialize] def take_until_aux (p : char → bool) : nat → parser_m σ δ μ unit
| 0 str inp := inp.val
| (n+1) str inp :=
match inp with
| ⟨result.ok _ i st bst _, _⟩ :=
if at_end str i then inp.val
else let c := curr str i in
if p c then inp.val
else take_until_aux n str (mk_input (next str i) st bst ff)
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
@[inline] def take_until (p : char → bool) : parser_m σ δ μ unit :=
λ str inp, take_until_aux p str.length str inp
def str_aux (in_s : string) (s : string) (error_msg : string) : nat → input σ δ μ → pos → result σ δ μ unit
| 0 inp j := mk_error inp error_msg
| (n+1) inp j :=
if at_end s j then inp.val
else
match inp with
| ⟨result.ok _ i st bst e, _⟩ :=
if at_end in_s i then mk_error inp error_msg
else if curr in_s i = curr s j then str_aux n (mk_input (next in_s i) st bst ff) (next s j)
else mk_error inp error_msg
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
@[inline] def str (s : string) : parser_m σ δ μ unit :=
λ in_str inp, str_aux in_str s ("expected '" ++ repr s ++ "'") in_str.length inp 0
@[specialize] def many_loop (a : α) (p : parser_m σ δ μ α) : nat → bool → parser_m σ δ μ α
| 0 fst := pure a
| (k+1) fst := λ str inp,
match inp with
| ⟨result.ok _ i₀ _ bst₀ _, _⟩ :=
(match p str inp with
| result.ok _ i st bst _ := many_loop k ff str (mk_input i st bst)
| result.error _ _ st _ _ := result.ok a i₀ st bst₀ fst)
| ⟨result.error _ _ _ _ _, h⟩ := unreachable_error h
-- Auxiliary function used to lift many_aux
@[inline] def many_aux (a : α) (p : parser_m σ δ μ α) : parser_m σ δ μ α :=
λ str inp, many_loop a p str.length tt str inp
@[inline] def many (p : parser_m σ δ μ unit) : parser_m σ δ μ unit :=
many_aux () p
@[inline] def many1 (p : parser_m σ δ μ unit) : parser_m σ δ μ unit :=
p *> many p
end prim
class monad_parser (σ : out_param Type) (δ : out_param Type) (μ : out_param Type) (m : Type → Type) :=
(lift {} {α : Type} : parser_m σ δ μ α → m α)
(map {} {α : Type} : (Π β, parser_m σ δ μ β → parser_m σ δ μ β) → m α → m α)
instance monad_parser_base : monad_parser σ δ μ (parser_m σ δ μ) :=
{ lift := λ α, id,
map := λ α f x, f α x }
instance monad_parser_trans {m n : Type → Type} [has_monad_lift m n] [monad_functor m m n n] [monad_parser σ δ μ m] : monad_parser σ δ μ n :=
{ lift := λ α p, monad_lift (monad_parser.lift p : m α),
map := λ α f x, monad_map (λ β x, (monad_parser.map @f x : m β)) x }
class monad_parser_aux (σ : out_param Type) (δ : out_param Type) (μ : out_param Type) (m : Type → Type) :=
(map {} {α : Type} : (parser_m σ δ μ α → parser_m σ δ μ α) → m α → m α)
instance monad_parser_aux_base : monad_parser_aux σ δ μ (parser_m σ δ μ) :=
{ map := λ α, id }
instance monad_parser_aux_reader {m : Type → Type} {ρ : Type} [monad m] [monad_parser_aux σ δ μ m] : monad_parser_aux σ δ μ (reader_t ρ m) :=
{ map := λ α f x r, (monad_parser_aux.map f : m α → m α) (x r) }
section
variables {m : Type → Type} [monad_parser σ δ μ m]
@[inline] def satisfy (p : char → bool) : m char := monad_parser.lift (prim.satisfy p)
def ch (c : char) : m char := satisfy (= c)
def alpha : m char := satisfy char.is_alpha
def digit : m char := satisfy char.is_digit
def upper : m char := satisfy char.is_upper
def lower : m char := satisfy char.is_lower
def any : m char := satisfy (λ _, true)
@[inline] def take_until (p : char → bool) : m unit := monad_parser.lift (prim.take_until p)
@[inline] def str (s : string) : m unit := monad_parser.lift (prim.str s)
@[inline] def lookahead (p : m α) : m α :=
monad_parser.map (λ β p, prim.lookahead p) p
@[inline] def take_while (p : char → bool) : m unit := take_until (λ c, !p c)
@[inline] def whitespace : m unit := take_while char.is_whitespace
end
section
variables {m : Type → Type} [monad_parser_aux σ δ μ m]
@[inline] def many (p : m unit) : m unit := monad_parser_aux.map prim.many p
@[inline] def many1 (p : m unit) : m unit := monad_parser_aux.map prim.many1 p
end
end parser
end lean
abbrev parser := reader_t nat (lean.parser.parser_m unit unit unit) unit
open lean.parser
-- set_option pp.implicit true
-- set_option trace.compiler.boxed true
def test_p : parser :=
many1 (str "++" <|> str "**" <|> (str "--" *> take_until (= '\n') *> any *> pure ()))
def test_parser (x : parser) (input : string) : string :=
match (x 0).run () () input with
| lean.parser.result.ok _ i _ _ _ := "Ok at " ++ to_string i
| result.error msg i _ _ _ := "Error at " ++ to_string i ++ ": " ++ msg
@[noinline] def test (s : string) : io unit :=
io.println (test_parser test_p s)
def mk_big_string : nat → string → string
| 0 s := s
| (n+1) s := mk_big_string n (s ++ "-- new comment\n")
def prof {α : Type} (msg : string) (p : io α) : io α :=
let msg₁ := "Time for '" ++ msg ++ "':" in
let msg₂ := "Memory usage for '" ++ msg ++ "':" in
allocprof msg₂ (timeit msg₁ p)
def main (xs : list string) : io unit :=
let s₁ := mk_big_string xs.head.to_nat "" in
let s₂ := s₁ ++ "bad" ++ mk_big_string 20 "" in
prof "parser 1" (test s₁) *>
prof "parser 2" (test s₂)