lean4-htt/library/init/lean/parser/module.lean

142 lines
5.5 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Sebastian Ullrich
Module-level parsers
-/
prelude
import init.lean.parser.command
import init.control.coroutine
namespace lean
namespace parser
open combinators monad_parsec coroutine
open parser.has_tokens parser.has_view
local postfix `?`:10000 := optional
local postfix *:10000 := combinators.many
local postfix +:10000 := combinators.many1
structure module_parser_config extends command_parser_config :=
(command_parsers : token_map command_parser)
instance module_parser_config_coe : has_coe module_parser_config command_parser_config :=
⟨module_parser_config.to_command_parser_config⟩
section
local attribute [reducible] parser_core_t
/- We do not need `expected/consumed` handling in this top-level parser that
just delegates to other parsers. More importantly, the standard
`parsec_t.bind x f` does not call `f` in a tail position and so destroys the
tail recursion of `commands_aux`. -/
local attribute [instance] parsec_t.monad'
/- NOTE: missing the `reader_t` from `parser_t` because the `coroutine` already provides
`monad_reader module_parser_config`. -/
@[derive monad alternative monad_reader monad_state monad_parsec monad_except]
def module_parser_m := state_t parser_state $ parser_t module_parser_config id
abbreviation module_parser := module_parser_m syntax
end
instance module_parser_m.lift_parser_t (ρ : Type) [has_lift_t module_parser_config ρ] :
has_monad_lift (parser_t ρ id) module_parser_m :=
{ monad_lift := λ α x st cfg, (λ a, (a, st)) <$> x.run ↑cfg }
section
local attribute [reducible] basic_parser_m
instance module_parser_m.basic_parser_m (ρ : Type) [has_lift_t module_parser_config ρ] :
has_monad_lift basic_parser_m module_parser_m :=
infer_instance
end
namespace module
@[derive parser.has_view parser.has_tokens]
def prelude.parser : basic_parser :=
node! «prelude» ["prelude"]
@[derive parser.has_view parser.has_tokens]
def import_path.parser : basic_parser :=
-- use `raw` to ignore registered tokens like ".."
node! import_path [
dirups: (raw_str ".")*,
module: ident.parser]
@[derive parser.has_view parser.has_tokens]
def import.parser : basic_parser :=
node! «import» ["import", imports: import_path.parser+]
@[derive parser.has_view parser.has_tokens]
def header.parser : basic_parser :=
node! «header» [«prelude»: prelude.parser?, imports: import.parser*]
@[pattern] def eoi : syntax_node_kind := ⟨`lean.parser.module.eoi⟩
def eoi.parser : module_parser := do
monad_parsec.eoi,
it ← left_over,
-- add `eoi` node for left-over input
let stop := it.to_end,
pure $ syntax.mk_node eoi [syntax.atom ⟨some ⟨⟨stop, stop⟩, stop.offset, ⟨stop, stop⟩⟩, ""⟩]
/-- Read command, recovering from errors inside commands (attach partial syntax tree)
as well as unknown commands (skip input). -/
private def command_wrec_aux : bool → nat → module_parser_m (bool × syntax)
| recovering 0 := error "unreachable"
| recovering (nat.succ n) := do
-- terminate at EOF
nat.succ _ ← remaining | (prod.mk ff) <$> eoi.parser,
(recovering, c) ← catch (do {
cfg ← read,
c ← monad_lift $ command.parser.run cfg.command_parsers,
pure (ff, some c)
} <|> do {
-- unknown command: try to skip token, or else single character
when (¬ recovering) $ do {
it ← left_over,
log_message {expected := dlist.singleton "command", it := it, custom := some ()}
},
try (monad_lift token *> pure ()) <|> (any *> pure ()),
pure (tt, none)
}) $ λ msg, do {
-- error inside command: log error, return partial syntax tree
log_message msg,
pure (tt, some msg.custom.get)
},
/- NOTE: We need to make very sure that these recursive calls are happening in tail positions.
Otherwise, resuming the coroutine is linear in the number of previous commands. -/
match c with
| some c := pure (recovering, c)
| none := command_wrec_aux recovering n
def parse_command_with_recovery (recovering : bool) :=
do { rem ← remaining, command_wrec_aux recovering rem.succ }
end module
open module
structure module_parser_snapshot :=
-- it there was a parse error in the previous command, we shouldn't complain if parsing immediately after it
-- fails as well
(recovering : bool)
(it : string.iterator)
-- return (partial) syntax tree and single fatal or multiple non-fatal messages
def resume_module_parser {α : Type} (cfg : module_parser_config) (snap : module_parser_snapshot) (mk_res : α → syntax × module_parser_snapshot)
(p : module_parser_m α) : syntax × except message (module_parser_snapshot × message_log) :=
let (r, _) := ((((prod.mk <$> p <*> left_over).run {messages:=message_log.empty}).run cfg).run_from snap.it).run {} in
match r with
| except.ok ((a, it), st) := let (stx, snap) := mk_res a in (stx, except.ok ({snap with it := it}, st.messages))
| except.error msg := (msg.custom.get, except.error $ message_of_parsec_message cfg msg)
def parse_header (cfg : module_parser_config) :=
let snap := {module_parser_snapshot . recovering := ff, it := cfg.input.mk_iterator} in
resume_module_parser cfg snap (λ stx, (stx, snap)) $ do
-- `token` assumes that there is no leading whitespace
monad_lift whitespace,
monad_lift header.parser
def parse_command (cfg) (snap) := resume_module_parser cfg snap (λ p, (prod.snd p, {snap with recovering := prod.fst p}))
(parse_command_with_recovery snap.recovering)
end parser
end lean