diff --git a/library/init/lean/parser/parsec.lean b/library/init/lean/parser/parsec.lean index 68ea6bb9a8..9c1e9da79e 100644 --- a/library/init/lean/parser/parsec.lean +++ b/library/init/lean/parser/parsec.lean @@ -185,40 +185,84 @@ def not_followed_by (p : parsec α) (msg : string := "input") : parsec unit := | ok _ _ := error { pos := it.offset, unexpected := msg } ff | ok_eps _ _ _ := error { pos := it.offset, unexpected := msg } ff | error _ _ := mk_eps () it + +def left_over : parsec iterator := +λ it, result.mk_eps it it + +def error (unexpected : string := "") (expected : dlist string := dlist.empty) (pos : option position := none) : parsec α := +λ it, error { unexpected := unexpected, expected := expected, pos := pos.get_or_else it.offset } ff + +@[inline] def eoi_error (pos : position) : result α := +result.error { pos := pos, unexpected := "end of input" } ff + +@[inline] def satisfy (p : char → bool) : parsec char := +λ it, + if !it.has_next then eoi_error it.offset + else let c := it.curr in + if p c then result.ok c it.next + else result.error { pos := it.offset, unexpected := repr c } ff + +private def str_aux : nat → iterator → iterator → option iterator +| 0 _ it := some it +| (n+1) s_it it := + if it.has_next ∧ s_it.curr = it.curr then str_aux n s_it.next it.next + else none + +def str (s : string) : parsec string := +if s.is_empty then pure "" +else λ it, match str_aux s.length s.mk_iterator it with + | some it' := result.ok s it' + | none := result.error { pos := it.offset, expected := dlist.singleton (repr s) } ff + +private def take_aux : nat → string → iterator → result string +| 0 r it := result.ok r it +| (n+1) r it := + if !it.has_next then eoi_error it.offset + else take_aux n (r.push (it.curr)) it.next + +def take (n : nat) : parsec string := +if n = 0 then pure "" +else take_aux n "" + +@[inline] private def mk_string_result (r : string) (it : iterator) : result string := +if r.is_empty then result.mk_eps r it +else result.ok r it + +private def take_while_aux (p : char → bool) : nat → string → parsec string +| 0 r it := mk_string_result r it +| (n+1) r it := + if !it.has_next then mk_string_result r it + else let c := it.curr in + if p c then take_while_aux n (r.push c) it.next + else mk_string_result r it + +def take_while_cont (p : char → bool) (ini : string) : parsec string := +λ it, take_while_aux p it.remaining ini it + +@[inline] private def mk_consumed_result (consumed : bool) (it : iterator) : result unit := +if consumed then result.ok () it +else result.mk_eps () it + +private def take_while_aux' (p : char → bool) : nat → bool → iterator → result unit +| 0 consumed it := mk_consumed_result consumed it +| (n+1) consumed it := + if !it.has_next then mk_consumed_result consumed it + else let c := it.curr in + if p c then take_while_aux' n tt it.next + else mk_consumed_result consumed it + +def take_while' (p : char → bool) : parsec unit := +λ it, take_while_aux' p it.remaining ff it end parsec -/- Type class for abstracting from concrete monad stacks containing a `parsec` somewhere. -/ + +/-- Type class for abstracting from concrete monad stacks containing a `parsec` somewhere. -/ class monad_parsec (m : Type → Type) := --- analogous to e.g. `monad_state.lift` -(lift {} {α : Type} : parsec α → m α) --- Analogous to e.g. `monad_reader_adapter.map` before simplification (see there). --- Its usage seems to be way too common to justify moving it into a separate type class. -(map {} {α : Type} : (∀ {α}, parsec α → parsec α) → m α → m α) - -instance {m : Type → Type} [monad m] : monad_parsec parsec := -{ lift := λ α p, p, - map := λ α f x, f x } - -instance monad_parsec_trans {m n : Type → Type} [has_monad_lift m n] [monad_functor m m n n] [monad_parsec m] : monad_parsec n := -{ lift := λ α p, monad_lift (monad_parsec.lift p : m α), - map := λ α f x, monad_map (λ β x, (monad_parsec.map @f x : m β)) x } - -namespace monad_parsec -open parsec -variables {m : Type → Type} [monad m] [monad_parsec m] [alternative m] {α β : Type} - -@[inline] def error {α : Type} (unexpected : string := "") (expected : dlist string := dlist.empty) : m α := -lift $ λ it, result.error ⟨it.offset, unexpected, expected⟩ ff - -def left_over : m iterator := -lift $ λ it, result.mk_eps it it - -@[inline] def label (p : m α) (ex : string) : m α := -map (λ _ p, @parsec.label _ p ex) p - -infixr ` `:2 := label - -/-- +(error {} {α : Type} (unexpected : string := "") (expected : dlist string := dlist.empty) + (pos : option parsec.position := none) : m α) +(left_over {} : m iterator) +(label {α : Type} (p : m α) (expected : string) : m α) +/- `try p` behaves like `p`, but it pretends `p` hasn't consumed any input when `p` fails. @@ -235,19 +279,50 @@ together. ``` Without the `try` combinator we will not be able to backtrack on the `let` keyword. -/ -@[inline] def try (p : m α) : m α := -map (λ _ p, parsec.try p) p - -/-- Parse `p` without consuming any input. -/ -@[inline] def lookahead (p : m α) : m α := -map (λ _ p, parsec.lookahead p) p - --- TODO(Sebastian): `monad_functor` is too weak to lift this, probably needs something like `monad_control` +(try {α : Type} (p : m α) : m α) +/- Parse `p` without consuming any input. -/ +(lookahead {α : Type} (p : m α) : m α) /- -/-- `not_followed_by p` succeeds when parser `p` fails -/ -@[inline] def not_followed_by (p : m α) (msg : string := "input") : m unit := -map (λ _ _ inst p, @parsec_t.not_followed_by _ inst _ p msg) p +If the next character `c` satisfies `p`, then +update position and return `c`. Otherwise, +generate error message with current position and character. -/ +(satisfy {} (p : char → bool) : m char) +/- +`str s` parses a sequence of elements that match `s`. Returns the parsed string (i.e. `s`). +This parser consumes no input if it fails (even if a partial match). +Note: The behaviour of this parser is different to that the `string` parser in the Parsec Haskell library, +as this one is all-or-nothing. -/ +(str {} (s : string) : m string) +/- Consume `n` characters. -/ +(take {} (n : nat) : m string) +(take_while_cont {} (p : char → bool) (ini : string) : m string) +/- Similar to `take_while` but it does not return the consumed input. -/ +(take_while' {} (p : char → bool) : m unit) + +open monad_parsec + +instance {m : Type → Type} [monad m] : monad_parsec parsec := +⟨@parsec.error, @parsec.left_over, @parsec.label, @parsec.try, @parsec.lookahead, @parsec.satisfy, + @parsec.str, @parsec.take, @parsec.take_while_cont, @parsec.take_while'⟩ + +instance monad_parsec_trans {m n : Type → Type} [has_monad_lift m n] [monad_functor m m n n] [monad_parsec m] [monad n] [has_orelse n] : monad_parsec n := +{ error := λ α un ex pos, monad_lift (error un ex pos : m _), + left_over := monad_lift (left_over : m _), + label := λ α p ex, monad_map (λ α (p : m α), label p ex) p, + try := λ α, monad_map (@try m _), + lookahead := λ α, monad_map (@try m _), + satisfy := λ p, monad_lift (satisfy p : m _), + str := λ s, monad_lift (str s : m _), + take := λ n, monad_lift (take n : m _), + take_while_cont := λ p ini, monad_lift (take_while_cont p ini : m _), + take_while' := λ p, monad_lift (take_while' p : m _) } + +namespace monad_parsec +open parsec +variables {m : Type → Type} [monad m] [monad_parsec m] [alternative m] {α β : Type} + +infixr ` `:2 := label /-- Faster version of `not_followed_by (satisfy p)` -/ @[inline] def not_followed_by_sat (p : char → bool) : m unit := @@ -257,26 +332,12 @@ do it ← left_over, if p c then error (repr c) else pure () -@[inline] def eoi_error (pos : position) : result α := -result.error { pos := pos, unexpected := "end of input" } ff - def curr : m char := string.iterator.curr <$> left_over @[inline] def cond (p : char → bool) (t : m α) (e : m α) : m α := mcond (p <$> curr) t e -/-- -If the next character `c` satisfies `p`, then -update position and return `c`. Otherwise, -generate error message with current position and character. -/ -@[inline] def satisfy (p : char → bool) : m char := -lift $ λ it, - if !it.has_next then eoi_error it.offset - else let c := it.curr in - if p c then result.ok c it.next - else result.error { pos := it.offset, unexpected := repr c } ff - def ch (c : char) : m char := satisfy (= c) @@ -295,68 +356,19 @@ satisfy char.is_lower def any : m char := satisfy (λ _, true) -private def str_aux : nat → iterator → iterator → option iterator -| 0 _ it := some it -| (n+1) s_it it := - if it.has_next ∧ s_it.curr = it.curr then str_aux n s_it.next it.next - else none - -/-- -`str s` parses a sequence of elements that match `s`. Returns the parsed string (i.e. `s`). -This parser consumes no input if it fails (even if a partial match). -Note: The behaviour of this parser is different to that the `string` parser in the Parsec Haskell library, -as this one is all-or-nothing. --/ -def str (s : string) : m string := -if s.is_empty then pure "" -else lift $ λ it, match str_aux s.length s.mk_iterator it with - | some it' := result.ok s it' - | none := result.error { pos := it.offset, expected := dlist.singleton (repr s) } ff - -private def take_aux : nat → string → iterator → result string -| 0 r it := result.ok r it -| (n+1) r it := - if !it.has_next then eoi_error it.offset - else take_aux n (r.push (it.curr)) it.next - -/-- Consume `n` characters. -/ -def take (n : nat) : m string := -if n = 0 then pure "" -else lift $ take_aux n "" - -@[inline] private def mk_string_result (r : string) (it : iterator) : result string := -if r.is_empty then result.mk_eps r it -else result.ok r it - -private def take_while_aux (p : char → bool) : nat → string → iterator → result string -| 0 r it := mk_string_result r it -| (n+1) r it := - if !it.has_next then mk_string_result r it - else let c := it.curr in - if p c then take_while_aux n (r.push c) it.next - else mk_string_result r it - /-- Consume input as long as the predicate returns `tt`, and return the consumed input. This parser does not fail. It will return an empty string if the predicate returns `ff` on the current character. -/ def take_while (p : char → bool) : m string := -lift $ λ it, take_while_aux p it.remaining "" it - -def take_while_cont (p : char → bool) (ini : string) : m string := -lift $ λ it, take_while_aux p it.remaining ini it +take_while_cont p "" /-- Consume input as long as the predicate returns `tt`, and return the consumed input. This parser requires the predicate to succeed on at least once. -/ def take_while1 (p : char → bool) : m string := -lift $ λ it, - if !it.has_next then eoi_error it.offset - else let c := it.curr in - if p c - then let input := it.next in - take_while_aux p input.remaining (to_string c) input - else result.error { pos := it.offset, unexpected := repr c } ff +do c ← satisfy p, + take_while_cont p (to_string c) /-- Consume input as long as the predicate returns `ff` (i.e. until it returns `tt`), and return the consumed input. @@ -367,31 +379,9 @@ take_while (λ c, !p c) def take_until1 (p : char → bool) : m string := take_while1 (λ c, !p c) -@[inline] private def mk_consumed_result (consumed : bool) (it : iterator) : result unit := -if consumed then result.ok () it -else result.mk_eps () it - -private def take_while_aux' (p : char → bool) : nat → bool → iterator → result unit -| 0 consumed it := mk_consumed_result consumed it -| (n+1) consumed it := - if !it.has_next then mk_consumed_result consumed it - else let c := it.curr in - if p c then take_while_aux' n tt it.next - else mk_consumed_result consumed it - -/-- Similar to `take_while` but it does not return the consumed input. -/ -def take_while' (p : char → bool) : m unit := -lift $ λ it, take_while_aux' p it.remaining ff it - /-- Similar to `take_while1` but it does not return the consumed input. -/ def take_while1' (p : char → bool) : m unit := -lift $ λ it, - if !it.has_next then eoi_error it.offset - else let c := it.curr in - if p c - then let input := it.next in - take_while_aux' p input.remaining tt input - else result.error { pos := it.offset, unexpected := repr c } ff +satisfy p *> take_while' p /-- Consume zero or more whitespaces. -/ def whitespace : m unit := @@ -481,7 +471,7 @@ def unexpected (msg : string) : m α := error msg def unexpected_at (msg : string) (pos : position) : m α := -lift $ λ _, result.error {unexpected := msg, pos := pos} ff +error msg dlist.empty pos end monad_parsec diff --git a/library/init/lean/parser/reader/token.lean b/library/init/lean/parser/reader/token.lean index 1e7064cd71..5d7bb0c3fe 100644 --- a/library/init/lean/parser/reader/token.lean +++ b/library/init/lean/parser/reader/token.lean @@ -95,7 +95,7 @@ do (r, i) ← with_source_info $ do { do str tk, pure $ λ i, syntax.atom ⟨some i, atomic_val.string tk⟩ -- variable-length token - | some ⟨tk, some r⟩ := str tk *> monad_parsec.lift r + | some ⟨tk, some r⟩ := error "not implemented" --str tk *> monad_parsec.lift r | none := number' <|> ident' }, pure (r i)