refactor(library/init/lean/parser/parsec): monad_parsec: move from monad_lift/monad_map to direct primitives
This breaks the code for variable-length tokens that depended on lifting `parsec` into `read_m`. Either `read_m` could be parameterized by its state, or we just hard-code all variable-length tokens.
This commit is contained in:
parent
d8122a7284
commit
9db0724bf1
2 changed files with 123 additions and 133 deletions
|
|
@ -185,40 +185,84 @@ def not_followed_by (p : parsec α) (msg : string := "input") : parsec unit :=
|
|||
| ok _ _ := error { pos := it.offset, unexpected := msg } ff
|
||||
| ok_eps _ _ _ := error { pos := it.offset, unexpected := msg } ff
|
||||
| error _ _ := mk_eps () it
|
||||
|
||||
def left_over : parsec iterator :=
|
||||
λ it, result.mk_eps it it
|
||||
|
||||
def error (unexpected : string := "") (expected : dlist string := dlist.empty) (pos : option position := none) : parsec α :=
|
||||
λ it, error { unexpected := unexpected, expected := expected, pos := pos.get_or_else it.offset } ff
|
||||
|
||||
@[inline] def eoi_error (pos : position) : result α :=
|
||||
result.error { pos := pos, unexpected := "end of input" } ff
|
||||
|
||||
@[inline] def satisfy (p : char → bool) : parsec char :=
|
||||
λ it,
|
||||
if !it.has_next then eoi_error it.offset
|
||||
else let c := it.curr in
|
||||
if p c then result.ok c it.next
|
||||
else result.error { pos := it.offset, unexpected := repr c } ff
|
||||
|
||||
private def str_aux : nat → iterator → iterator → option iterator
|
||||
| 0 _ it := some it
|
||||
| (n+1) s_it it :=
|
||||
if it.has_next ∧ s_it.curr = it.curr then str_aux n s_it.next it.next
|
||||
else none
|
||||
|
||||
def str (s : string) : parsec string :=
|
||||
if s.is_empty then pure ""
|
||||
else λ it, match str_aux s.length s.mk_iterator it with
|
||||
| some it' := result.ok s it'
|
||||
| none := result.error { pos := it.offset, expected := dlist.singleton (repr s) } ff
|
||||
|
||||
private def take_aux : nat → string → iterator → result string
|
||||
| 0 r it := result.ok r it
|
||||
| (n+1) r it :=
|
||||
if !it.has_next then eoi_error it.offset
|
||||
else take_aux n (r.push (it.curr)) it.next
|
||||
|
||||
def take (n : nat) : parsec string :=
|
||||
if n = 0 then pure ""
|
||||
else take_aux n ""
|
||||
|
||||
@[inline] private def mk_string_result (r : string) (it : iterator) : result string :=
|
||||
if r.is_empty then result.mk_eps r it
|
||||
else result.ok r it
|
||||
|
||||
private def take_while_aux (p : char → bool) : nat → string → parsec string
|
||||
| 0 r it := mk_string_result r it
|
||||
| (n+1) r it :=
|
||||
if !it.has_next then mk_string_result r it
|
||||
else let c := it.curr in
|
||||
if p c then take_while_aux n (r.push c) it.next
|
||||
else mk_string_result r it
|
||||
|
||||
def take_while_cont (p : char → bool) (ini : string) : parsec string :=
|
||||
λ it, take_while_aux p it.remaining ini it
|
||||
|
||||
@[inline] private def mk_consumed_result (consumed : bool) (it : iterator) : result unit :=
|
||||
if consumed then result.ok () it
|
||||
else result.mk_eps () it
|
||||
|
||||
private def take_while_aux' (p : char → bool) : nat → bool → iterator → result unit
|
||||
| 0 consumed it := mk_consumed_result consumed it
|
||||
| (n+1) consumed it :=
|
||||
if !it.has_next then mk_consumed_result consumed it
|
||||
else let c := it.curr in
|
||||
if p c then take_while_aux' n tt it.next
|
||||
else mk_consumed_result consumed it
|
||||
|
||||
def take_while' (p : char → bool) : parsec unit :=
|
||||
λ it, take_while_aux' p it.remaining ff it
|
||||
end parsec
|
||||
|
||||
/- Type class for abstracting from concrete monad stacks containing a `parsec` somewhere. -/
|
||||
|
||||
/-- Type class for abstracting from concrete monad stacks containing a `parsec` somewhere. -/
|
||||
class monad_parsec (m : Type → Type) :=
|
||||
-- analogous to e.g. `monad_state.lift`
|
||||
(lift {} {α : Type} : parsec α → m α)
|
||||
-- Analogous to e.g. `monad_reader_adapter.map` before simplification (see there).
|
||||
-- Its usage seems to be way too common to justify moving it into a separate type class.
|
||||
(map {} {α : Type} : (∀ {α}, parsec α → parsec α) → m α → m α)
|
||||
|
||||
instance {m : Type → Type} [monad m] : monad_parsec parsec :=
|
||||
{ lift := λ α p, p,
|
||||
map := λ α f x, f x }
|
||||
|
||||
instance monad_parsec_trans {m n : Type → Type} [has_monad_lift m n] [monad_functor m m n n] [monad_parsec m] : monad_parsec n :=
|
||||
{ lift := λ α p, monad_lift (monad_parsec.lift p : m α),
|
||||
map := λ α f x, monad_map (λ β x, (monad_parsec.map @f x : m β)) x }
|
||||
|
||||
namespace monad_parsec
|
||||
open parsec
|
||||
variables {m : Type → Type} [monad m] [monad_parsec m] [alternative m] {α β : Type}
|
||||
|
||||
@[inline] def error {α : Type} (unexpected : string := "") (expected : dlist string := dlist.empty) : m α :=
|
||||
lift $ λ it, result.error ⟨it.offset, unexpected, expected⟩ ff
|
||||
|
||||
def left_over : m iterator :=
|
||||
lift $ λ it, result.mk_eps it it
|
||||
|
||||
@[inline] def label (p : m α) (ex : string) : m α :=
|
||||
map (λ _ p, @parsec.label _ p ex) p
|
||||
|
||||
infixr ` <?> `:2 := label
|
||||
|
||||
/--
|
||||
(error {} {α : Type} (unexpected : string := "") (expected : dlist string := dlist.empty)
|
||||
(pos : option parsec.position := none) : m α)
|
||||
(left_over {} : m iterator)
|
||||
(label {α : Type} (p : m α) (expected : string) : m α)
|
||||
/-
|
||||
`try p` behaves like `p`, but it pretends `p` hasn't
|
||||
consumed any input when `p` fails.
|
||||
|
||||
|
|
@ -235,19 +279,50 @@ together.
|
|||
```
|
||||
Without the `try` combinator we will not be able to backtrack on the `let` keyword.
|
||||
-/
|
||||
@[inline] def try (p : m α) : m α :=
|
||||
map (λ _ p, parsec.try p) p
|
||||
|
||||
/-- Parse `p` without consuming any input. -/
|
||||
@[inline] def lookahead (p : m α) : m α :=
|
||||
map (λ _ p, parsec.lookahead p) p
|
||||
|
||||
-- TODO(Sebastian): `monad_functor` is too weak to lift this, probably needs something like `monad_control`
|
||||
(try {α : Type} (p : m α) : m α)
|
||||
/- Parse `p` without consuming any input. -/
|
||||
(lookahead {α : Type} (p : m α) : m α)
|
||||
/-
|
||||
/-- `not_followed_by p` succeeds when parser `p` fails -/
|
||||
@[inline] def not_followed_by (p : m α) (msg : string := "input") : m unit :=
|
||||
map (λ _ _ inst p, @parsec_t.not_followed_by _ inst _ p msg) p
|
||||
If the next character `c` satisfies `p`, then
|
||||
update position and return `c`. Otherwise,
|
||||
generate error message with current position and character. -/
|
||||
(satisfy {} (p : char → bool) : m char)
|
||||
/-
|
||||
`str s` parses a sequence of elements that match `s`. Returns the parsed string (i.e. `s`).
|
||||
This parser consumes no input if it fails (even if a partial match).
|
||||
Note: The behaviour of this parser is different to that the `string` parser in the Parsec Haskell library,
|
||||
as this one is all-or-nothing.
|
||||
-/
|
||||
(str {} (s : string) : m string)
|
||||
/- Consume `n` characters. -/
|
||||
(take {} (n : nat) : m string)
|
||||
(take_while_cont {} (p : char → bool) (ini : string) : m string)
|
||||
/- Similar to `take_while` but it does not return the consumed input. -/
|
||||
(take_while' {} (p : char → bool) : m unit)
|
||||
|
||||
open monad_parsec
|
||||
|
||||
instance {m : Type → Type} [monad m] : monad_parsec parsec :=
|
||||
⟨@parsec.error, @parsec.left_over, @parsec.label, @parsec.try, @parsec.lookahead, @parsec.satisfy,
|
||||
@parsec.str, @parsec.take, @parsec.take_while_cont, @parsec.take_while'⟩
|
||||
|
||||
instance monad_parsec_trans {m n : Type → Type} [has_monad_lift m n] [monad_functor m m n n] [monad_parsec m] [monad n] [has_orelse n] : monad_parsec n :=
|
||||
{ error := λ α un ex pos, monad_lift (error un ex pos : m _),
|
||||
left_over := monad_lift (left_over : m _),
|
||||
label := λ α p ex, monad_map (λ α (p : m α), label p ex) p,
|
||||
try := λ α, monad_map (@try m _),
|
||||
lookahead := λ α, monad_map (@try m _),
|
||||
satisfy := λ p, monad_lift (satisfy p : m _),
|
||||
str := λ s, monad_lift (str s : m _),
|
||||
take := λ n, monad_lift (take n : m _),
|
||||
take_while_cont := λ p ini, monad_lift (take_while_cont p ini : m _),
|
||||
take_while' := λ p, monad_lift (take_while' p : m _) }
|
||||
|
||||
namespace monad_parsec
|
||||
open parsec
|
||||
variables {m : Type → Type} [monad m] [monad_parsec m] [alternative m] {α β : Type}
|
||||
|
||||
infixr ` <?> `:2 := label
|
||||
|
||||
/-- Faster version of `not_followed_by (satisfy p)` -/
|
||||
@[inline] def not_followed_by_sat (p : char → bool) : m unit :=
|
||||
|
|
@ -257,26 +332,12 @@ do it ← left_over,
|
|||
if p c then error (repr c)
|
||||
else pure ()
|
||||
|
||||
@[inline] def eoi_error (pos : position) : result α :=
|
||||
result.error { pos := pos, unexpected := "end of input" } ff
|
||||
|
||||
def curr : m char :=
|
||||
string.iterator.curr <$> left_over
|
||||
|
||||
@[inline] def cond (p : char → bool) (t : m α) (e : m α) : m α :=
|
||||
mcond (p <$> curr) t e
|
||||
|
||||
/--
|
||||
If the next character `c` satisfies `p`, then
|
||||
update position and return `c`. Otherwise,
|
||||
generate error message with current position and character. -/
|
||||
@[inline] def satisfy (p : char → bool) : m char :=
|
||||
lift $ λ it,
|
||||
if !it.has_next then eoi_error it.offset
|
||||
else let c := it.curr in
|
||||
if p c then result.ok c it.next
|
||||
else result.error { pos := it.offset, unexpected := repr c } ff
|
||||
|
||||
def ch (c : char) : m char :=
|
||||
satisfy (= c)
|
||||
|
||||
|
|
@ -295,68 +356,19 @@ satisfy char.is_lower
|
|||
def any : m char :=
|
||||
satisfy (λ _, true)
|
||||
|
||||
private def str_aux : nat → iterator → iterator → option iterator
|
||||
| 0 _ it := some it
|
||||
| (n+1) s_it it :=
|
||||
if it.has_next ∧ s_it.curr = it.curr then str_aux n s_it.next it.next
|
||||
else none
|
||||
|
||||
/--
|
||||
`str s` parses a sequence of elements that match `s`. Returns the parsed string (i.e. `s`).
|
||||
This parser consumes no input if it fails (even if a partial match).
|
||||
Note: The behaviour of this parser is different to that the `string` parser in the Parsec Haskell library,
|
||||
as this one is all-or-nothing.
|
||||
-/
|
||||
def str (s : string) : m string :=
|
||||
if s.is_empty then pure ""
|
||||
else lift $ λ it, match str_aux s.length s.mk_iterator it with
|
||||
| some it' := result.ok s it'
|
||||
| none := result.error { pos := it.offset, expected := dlist.singleton (repr s) } ff
|
||||
|
||||
private def take_aux : nat → string → iterator → result string
|
||||
| 0 r it := result.ok r it
|
||||
| (n+1) r it :=
|
||||
if !it.has_next then eoi_error it.offset
|
||||
else take_aux n (r.push (it.curr)) it.next
|
||||
|
||||
/-- Consume `n` characters. -/
|
||||
def take (n : nat) : m string :=
|
||||
if n = 0 then pure ""
|
||||
else lift $ take_aux n ""
|
||||
|
||||
@[inline] private def mk_string_result (r : string) (it : iterator) : result string :=
|
||||
if r.is_empty then result.mk_eps r it
|
||||
else result.ok r it
|
||||
|
||||
private def take_while_aux (p : char → bool) : nat → string → iterator → result string
|
||||
| 0 r it := mk_string_result r it
|
||||
| (n+1) r it :=
|
||||
if !it.has_next then mk_string_result r it
|
||||
else let c := it.curr in
|
||||
if p c then take_while_aux n (r.push c) it.next
|
||||
else mk_string_result r it
|
||||
|
||||
/--
|
||||
Consume input as long as the predicate returns `tt`, and return the consumed input.
|
||||
This parser does not fail. It will return an empty string if the predicate
|
||||
returns `ff` on the current character. -/
|
||||
def take_while (p : char → bool) : m string :=
|
||||
lift $ λ it, take_while_aux p it.remaining "" it
|
||||
|
||||
def take_while_cont (p : char → bool) (ini : string) : m string :=
|
||||
lift $ λ it, take_while_aux p it.remaining ini it
|
||||
take_while_cont p ""
|
||||
|
||||
/--
|
||||
Consume input as long as the predicate returns `tt`, and return the consumed input.
|
||||
This parser requires the predicate to succeed on at least once. -/
|
||||
def take_while1 (p : char → bool) : m string :=
|
||||
lift $ λ it,
|
||||
if !it.has_next then eoi_error it.offset
|
||||
else let c := it.curr in
|
||||
if p c
|
||||
then let input := it.next in
|
||||
take_while_aux p input.remaining (to_string c) input
|
||||
else result.error { pos := it.offset, unexpected := repr c } ff
|
||||
do c ← satisfy p,
|
||||
take_while_cont p (to_string c)
|
||||
|
||||
/--
|
||||
Consume input as long as the predicate returns `ff` (i.e. until it returns `tt`), and return the consumed input.
|
||||
|
|
@ -367,31 +379,9 @@ take_while (λ c, !p c)
|
|||
def take_until1 (p : char → bool) : m string :=
|
||||
take_while1 (λ c, !p c)
|
||||
|
||||
@[inline] private def mk_consumed_result (consumed : bool) (it : iterator) : result unit :=
|
||||
if consumed then result.ok () it
|
||||
else result.mk_eps () it
|
||||
|
||||
private def take_while_aux' (p : char → bool) : nat → bool → iterator → result unit
|
||||
| 0 consumed it := mk_consumed_result consumed it
|
||||
| (n+1) consumed it :=
|
||||
if !it.has_next then mk_consumed_result consumed it
|
||||
else let c := it.curr in
|
||||
if p c then take_while_aux' n tt it.next
|
||||
else mk_consumed_result consumed it
|
||||
|
||||
/-- Similar to `take_while` but it does not return the consumed input. -/
|
||||
def take_while' (p : char → bool) : m unit :=
|
||||
lift $ λ it, take_while_aux' p it.remaining ff it
|
||||
|
||||
/-- Similar to `take_while1` but it does not return the consumed input. -/
|
||||
def take_while1' (p : char → bool) : m unit :=
|
||||
lift $ λ it,
|
||||
if !it.has_next then eoi_error it.offset
|
||||
else let c := it.curr in
|
||||
if p c
|
||||
then let input := it.next in
|
||||
take_while_aux' p input.remaining tt input
|
||||
else result.error { pos := it.offset, unexpected := repr c } ff
|
||||
satisfy p *> take_while' p
|
||||
|
||||
/-- Consume zero or more whitespaces. -/
|
||||
def whitespace : m unit :=
|
||||
|
|
@ -481,7 +471,7 @@ def unexpected (msg : string) : m α :=
|
|||
error msg
|
||||
|
||||
def unexpected_at (msg : string) (pos : position) : m α :=
|
||||
lift $ λ _, result.error {unexpected := msg, pos := pos} ff
|
||||
error msg dlist.empty pos
|
||||
|
||||
end monad_parsec
|
||||
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ do (r, i) ← with_source_info $ do {
|
|||
do str tk,
|
||||
pure $ λ i, syntax.atom ⟨some i, atomic_val.string tk⟩
|
||||
-- variable-length token
|
||||
| some ⟨tk, some r⟩ := str tk *> monad_parsec.lift r
|
||||
| some ⟨tk, some r⟩ := error "not implemented" --str tk *> monad_parsec.lift r
|
||||
| none := number' <|> ident'
|
||||
},
|
||||
pure (r i)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue