lean4-htt/library/init/lean/parser/reader/basic.lean

303 lines
11 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Sebastian Ullrich
Reader for the Lean language
-/
prelude
import init.lean.parser.parsec init.lean.parser.syntax init.lean.parser.macro
import init.lean.parser.identifier
/-- A small wrapper of `reader_t` that simplifies introducing and invoking
recursion points in a computation. -/
-- TODO(Sebastian): move?
def rec_t (r : Type) (m : Type → Type) (α : Type) :=
reader_t (m r) m α
namespace rec_t
variables {m : Type → Type} {r α : Type} [monad m]
local attribute [reducible] rec_t
/-- Continue at the recursion point stored at `with_recurse`. -/
def recurse : rec_t r m r :=
do x ← read,
monad_lift x
variables (base : m r) (rec : rec_t r m r)
private def with_recurse_aux : nat → m r
| 0 := base
| (n+1) := rec.run (with_recurse_aux n)
/-- Execute `rec`, re-executing it whenever `recurse` is called.
After `max_rec` recursion steps, `base` is executed instead. -/
def with_recurse (max_rec := 1000) : rec_t r m r :=
⟨λ _, rec.run (with_recurse_aux base rec max_rec)⟩
-- not clear how to auto-derive these given the additional constraints
instance : monad (rec_t r m) := infer_instance
instance [alternative m] : alternative (rec_t r m) := infer_instance
instance : has_monad_lift m (rec_t r m) := infer_instance
instance (ε) [monad_except ε m] : monad_except ε (rec_t r m) := infer_instance
instance (μ) [alternative m] [lean.parser.monad_parsec μ m] : lean.parser.monad_parsec μ (rec_t r m) :=
infer_instance
end rec_t
namespace lean
-- TODO: enhance massively
abbreviation message := string
namespace parser
structure token_config :=
(«prefix» : string)
-- reading a token should not need any state
/- An optional parser that is activated after matching `prefix`.
It should return a syntax tree with a "hole" for the
`source_info` surrounding the token, which will be supplied
by the `token` reader. -/
(token_reader : option (parsec' (source_info → syntax)) := none)
structure reader_state :=
(tokens : list token_config)
-- note: stored in reverse for efficient append
(errors : list lean.message)
/- Start position of the current token. This might not be equal to the parser
position for two reasons:
* We plan to eagerly parse leading whitespace so as not to do so multiple times
* During error recovery, skipped input should be associated to the next token -/
(token_start : string.iterator)
structure reader_config := mk
@[irreducible, derive monad alternative monad_reader monad_state monad_parsec monad_except]
def read_m := rec_t syntax $ reader_t reader_config $ state_t reader_state $ parsec syntax
structure reader :=
(read : read_m syntax)
(tokens : list token_config := [])
class reader.has_view (r : reader) (α : out_param Type) :=
(view : syntax → option α)
(review : α → syntax)
instance reader.has_view.default (r : reader) : inhabited (reader.has_view r syntax) :=
⟨{ view := some, review := id }⟩
class macro.has_view (m : macro) (α : out_param Type) :=
(view : syntax → option α)
(review : α → syntax)
namespace read_m
local attribute [reducible] read_m
protected def run (cfg : reader_config) (st : reader_state) (s : string) (r : read_m syntax) :
syntax × list message :=
match (((r.run (monad_parsec.error "no recursive parser at top level")).run cfg).run st).parse_with_eoi s with
| except.ok (a, st) := (a, st.errors.reverse)
| except.error msg := (msg.custom, [to_string msg])
def log_error (e : message) : read_m unit :=
modify (λ st, {st with errors := to_string e :: st.errors})
end read_m
namespace reader
open monad_parsec
open reader.has_view
variable {α : Type}
protected def parse (cfg : reader_config) (s : string) (r : reader) :
syntax × list message :=
-- the only hardcoded tokens, because they are never directly mentioned by a `reader`
let tokens : list token_config := [⟨"/-", none⟩, ⟨"--", none⟩] in
do {
stx ← catch r.read $ λ (msg : parsec.message _), do {
modify $ λ st, {st with token_start := msg.it},
read_m.log_error (to_string msg),
pure msg.custom
},
whitespace,
-- add `eoi` node and store any residual input in its prefix
catch eoi $ λ msg, read_m.log_error (to_string msg),
tk_start ← reader_state.token_start <$> get,
let stop := tk_start.to_end in
pure $ syntax.node ⟨name.anonymous, [
stx,
syntax.node ⟨`eoi, [syntax.atom ⟨some ⟨⟨tk_start, stop⟩, stop.offset, ⟨stop, stop⟩⟩, atomic_val.string ""⟩]⟩
]⟩
}.run cfg ⟨r.tokens ++ tokens, [], s.mk_iterator⟩ s
structure parse.view_ty :=
(root : syntax)
(eoi : syntax)
def parse.view : syntax → option parse.view_ty
| (syntax.node ⟨name.anonymous, [root, eoi]⟩) := some ⟨root, eoi⟩
| _ := none
namespace combinators
def node' (m : name) (rs : list reader) : reader :=
{ read := do {
(args, _) ← rs.mfoldl (λ (p : list syntax × nat) r, do
(args, remaining) ← pure p,
-- on error, append partial syntax tree and `missing` objects to previous successful parses and rethrow
a ← catch r.read $ λ msg,
let args := list.repeat syntax.missing (remaining-1) ++ msg.custom :: args in
throw {msg with custom := syntax.node ⟨m, args.reverse⟩},
pure (a::args, remaining - 1)
) ([], rs.length),
pure $ syntax.node ⟨m, args.reverse⟩
},
tokens := rs.bind reader.tokens }
@[reducible] def seq := node' name.anonymous
@[reducible] def node (m : macro) := node' m.name
instance node.view (m rs) [i : macro.has_view m α] : reader.has_view (node m rs) α :=
{ view := i.view, review := i.review }
/-
instance node'.view_cons (β m r rs) [reader.has_view r α] [reader.has_view (node' m rs) β] : reader.has_view (node' m (r::rs)) (α × β) :=
{ view := λ stx, do {
syntax.node ⟨m', (stx::stxs)⟩ ← pure stx | failure,
guard (m' = m),
a ← view r stx,
b ← view (node' m rs) $ syntax.node ⟨m, stxs⟩,
pure (a, b)
},
review := λ ⟨a, b⟩, match review (node' m rs) b with
| syntax.node ⟨_, stxs⟩ := syntax.node ⟨m, review r a::stxs⟩
| _ := syntax.missing /- unreachable -/ }
instance node'.view_nil (m r) [reader.has_view r α] : reader.has_view (node' m [r]) α :=
{ view := λ stx, do {
syntax.node ⟨m', [stx]⟩ ← pure stx | failure,
guard (m' = m),
a ← view r stx,
pure a
},
review := λ a, syntax.node ⟨m, [review r a]⟩ }
-/
private def many1_aux (p : read_m syntax) : list syntax → nat → read_m syntax
| as 0 := error "unreachable"
| as (n+1) := do a ← catch p (λ msg, throw {msg with custom :=
-- append `syntax.missing` to make clear that list is incomplete
syntax.node ⟨name.anonymous, (syntax.missing::msg.custom::as).reverse⟩}),
many1_aux (a::as) n <|> pure (syntax.node ⟨name.anonymous, (a::as).reverse⟩)
def many1 (r : reader) : reader :=
{ r with read := do rem ← remaining, many1_aux r.read [] (rem+1) }
instance many1.view (r) [reader.has_view r α] : reader.has_view (many1 r) (list α) :=
{ view := λ stx, match stx with
| syntax.missing := list.ret <$> view r syntax.missing
| syntax.node ⟨name.anonymous, stxs⟩ := stxs.mmap (view r)
| _ := failure,
review := λ as, syntax.node ⟨name.anonymous, as.map (review r)⟩ }
/-
instance many1.view (r) : reader.has_view (many1 r) (list syntax) :=
{ view := λ stx, match stx with
| syntax.missing := [syntax.missing]
| syntax.node ⟨name.anonymous, stxs⟩ := stxs
| _ := failure,
review := λ stxs, syntax.node ⟨name.anonymous, stxs⟩ }
-/
def many (r : reader) : reader :=
{ r with read := (many1 r).read <|> pure (syntax.node ⟨name.anonymous, []⟩) }
/-
instance many.view (r) : reader.has_view (many r) (list syntax) :=
{..many1.view r}
-/
instance many.view (r) [has_view r α] : reader.has_view (many r) (list α) :=
{..many1.view r}
def optional (r : reader) : reader :=
{ r with read := do
r ← optional $
-- on error, wrap in "some"
catch r.read (λ msg, throw {msg with custom := syntax.node ⟨name.anonymous, [msg.custom]⟩}),
pure $ match r with
| some r := syntax.node ⟨name.anonymous, [r]⟩
| none := syntax.node ⟨name.anonymous, []⟩ }
inductive optional_view (α : Type)
| some (a : α) : optional_view
| none {} : optional_view
| missing {} : optional_view
namespace optional_view
instance : functor optional_view :=
{ map := λ _ _ f v, match v with
| some a := some (f a)
| none := none
| missing := missing }
end optional_view
instance optional.view (r) [reader.has_view r α] : reader.has_view (optional r) (optional_view α) :=
{ view := λ stx, match stx with
| syntax.missing := pure optional_view.missing
| syntax.node ⟨name.anonymous, []⟩ := pure optional_view.none
| syntax.node ⟨name.anonymous, [stx]⟩ := optional_view.some <$> view r stx
| _ := failure,
review := λ a, match a with
| optional_view.some a := syntax.node ⟨name.anonymous, [review r a]⟩
| optional_view.none := syntax.node ⟨name.anonymous, []⟩
| optional_view.missing := syntax.missing }
/-- Parse a list `[p1, ..., pn]` of readers as `p1 <|> ... <|> pn`.
Note that there is NO explicit encoding of which reader was chosen;
readers should instead produce distinct node names for disambiguation. -/
def any_of (rs : list reader) : reader :=
{ read := (match rs with
| [] := error "any_of"
| (r::rs) := (rs.map reader.read).foldl (<|>) r.read),
tokens := (rs.map reader.tokens).join }
instance any_of.view (rs) : reader.has_view (any_of rs) syntax := default _
/-- Parse a list `[p1, ..., pn]` of readers as `p1 <|> ... <|> pn`.
The result will be wrapped in a node with the the index of the successful
parser as the name. -/
def choice (rs : list reader) : reader :=
{ read :=
(rs.map reader.read).enum.foldr
(λ ⟨i, r⟩ r', (λ stx, syntax.node ⟨name.mk_numeral name.anonymous i, [stx]⟩) <$> r <|> r')
-- use `foldr` so that any other error is preferred over this one
(error "choice: empty list"),
tokens := (rs.map reader.tokens).join }
def try (r : reader) : reader :=
{ r with read := try r.read }
instance try.view (r) [i : reader.has_view r α] : reader.has_view (try r) α :=
{..i}
def label (r : reader) (l : string) : reader :=
{ r with read := label r.read l }
instance label.view (r l) [i : reader.has_view r α] : reader.has_view (label r l) α :=
{..i}
infixr <?> := label
def dbg (label : string) (r : reader) : reader :=
{ r with read := dbg label r.read }
instance dbg.view (r l) [i : reader.has_view r α] : reader.has_view (dbg l r) α :=
{..i}
local attribute [reducible] read_m
def recurse : reader :=
{ read := rec_t.recurse,
tokens := [] } -- recursive use should not contribute any new tokens
def with_recurse (r : reader) : reader :=
{ r with read := rec_t.with_recurse (error "recursion limit") r.read }
end combinators
end reader
end parser
end lean