lean4-htt/library/init/lean/parser/basic.lean

154 lines
6.1 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Sebastian Ullrich
Parser for the Lean language
-/
prelude
import init.lean.parser.parsec init.lean.parser.syntax init.lean.parser.rec
import init.lean.parser.trie
import init.lean.parser.identifier init.data.rbmap init.lean.message
namespace lean
namespace parser
/- Maximum standard precedence. This is the precedence of function application.
In the standard Lean language, only the token `.` has a left-binding power greater
than `max_prec` (so that field accesses like `g (h x).f` are parsed as `g ((h x).f)`,
not `(g (h x)).f`). -/
def max_prec : nat := 1024
structure token_config :=
(«prefix» : string)
/- Left-binding power used by the term parser. The term parser operates in the context
of a right-binding power between 0 (used by parentheses and on the top-level) and
(usually) `max_prec` (used by function application). After parsing an initial term,
it continues parsing and expanding that term only when the left-binding power of
the next token is greater than the current right-binding power. For example, it
never continues parsing an argument after the initial parse, unless a token with
lbp > max_prec is encountered. Conversely, the term parser will always continue
parsing inside parentheses until it finds a token with lbp 0 (such as `)`). -/
(lbp : nat := 0)
-- reading a token should not need any state
/- An optional parser that is activated after matching `prefix`.
It should return a syntax tree with a "hole" for the
`source_info` surrounding the token, which will be supplied
by the `token` parser. -/
(token_parser : option (parsec' (source_info → syntax)) := none)
structure parser_state :=
(tokens : trie token_config)
(messages : message_log)
structure parser_config :=
(filename : string)
@[derive monad alternative monad_reader monad_state monad_parsec monad_except]
def parser_t (m : Type → Type) [monad m] := reader_t parser_config $ state_t parser_state $ parsec_t syntax m
abbreviation basic_parser_m := parser_t id
abbreviation basic_parser := basic_parser_m syntax
abbreviation monad_basic_read := has_monad_lift_t basic_parser_m
-- an arbitrary `parser` type; parsers are usually some monad stack based on `basic_parser_m` returning `syntax`
variable {ρ : Type}
class has_tokens (r : ρ) := mk {} ::
(add_tokens : trie token_config → trie token_config)
instance has_tokens.inhabited (r : ρ) : inhabited (has_tokens r) :=
⟨⟨id⟩⟩
def donotinline {α : Type} (a : α) (f : αα := id) :=
f (f a)
-- do NOT inline this function
def add_tokens (r : ρ) [has_tokens r] :=
donotinline (has_tokens.add_tokens r)
instance list.nil.tokens : parser.has_tokens ([] : list ρ) :=
⟨id⟩
def concat_tokens (r : ρ) (rs : list ρ) [parser.has_tokens r] [parser.has_tokens rs] :
trie token_config → trie token_config :=
λ t, has_tokens.add_tokens rs (has_tokens.add_tokens r t)
instance list.cons.tokens (r : ρ) (rs : list ρ) [parser.has_tokens r] [parser.has_tokens rs] :
parser.has_tokens (r::rs) :=
⟨λ t, has_tokens.add_tokens rs (has_tokens.add_tokens r t)⟩
--⟨concat_tokens r rs⟩
protected class has_view (r : ρ) (α : out_param Type) :=
(view : syntax → option α)
(review : α → syntax)
instance has_view.default (r : ρ) : inhabited (parser.has_view r syntax) :=
⟨{ view := some, review := id }⟩
class syntax_node_kind.has_view (k : syntax_node_kind) (α : out_param Type) :=
(view : syntax → option α)
(review : α → syntax)
def message_of_parsec_message {μ : Type} (cfg : parser_config) (msg : parsec.message μ) : message :=
-- FIXME: translate position
{filename := cfg.filename, pos := ⟨0, 0⟩, text := to_string msg}
section
local attribute [reducible] parser_t
protected def run {m : Type → Type} [monad m] (cfg : parser_config) (st : parser_state) (s : string) (r : parser_t m syntax) :
m (syntax × message_log) :=
do r ← ((r.run cfg).run st).parse_with_eoi s,
pure $ match r with
| except.ok (stx, st) := (stx.update_leading s, st.messages)
| except.error msg := (msg.custom.update_leading s, message_log.empty.add (message_of_parsec_message cfg msg))
end
open monad_parsec
open parser.has_view
variables {α : Type} {m : Type → Type}
local notation `parser` := m syntax
def log_message {μ : Type} [monad m] [monad_reader parser_config m] [monad_state parser_state m] (msg : parsec.message μ) : m unit :=
do cfg ← read,
modify (λ st, {st with messages := st.messages.add (message_of_parsec_message cfg msg)})
def eoi : syntax_node_kind := ⟨`lean.parser.eoi⟩
protected def parse [monad m] (cfg : parser_config) (s : string) (r : parser_t m syntax) [parser.has_tokens r] :
m (syntax × message_log) :=
-- the only hardcoded tokens, because they are never directly mentioned by a `parser`
let builtin_tokens : list token_config := [⟨"/-", 0, none⟩, ⟨"--", 0, none⟩] in
let trie := has_tokens.add_tokens r (builtin_tokens.foldl (λ t cfg, trie.insert t cfg.prefix cfg) trie.mk) in
parser.run cfg ⟨trie, message_log.empty⟩ s $ do
stx ← catch r $ λ (msg : parsec.message _), do {
parser.log_message msg,
pure msg.custom
},
-- add `eoi` node
catch monad_parsec.eoi parser.log_message,
let stop := s.mk_iterator.to_end,
pure $ syntax.node ⟨none, [
stx,
syntax.node ⟨eoi, [syntax.atom ⟨some ⟨⟨stop, stop⟩, stop.offset, ⟨stop, stop⟩⟩, ""⟩]⟩
]⟩
structure parse.view_ty :=
(root : syntax)
(eoi : syntax)
def parse.view : syntax → option parse.view_ty
| (syntax.node ⟨none, [root, eoi]⟩) := some ⟨root, eoi⟩
| _ := none
/- Monad stacks used in multiple files -/
@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_rec monad_basic_read]
def command_parser_m := rec_t unit syntax basic_parser_m
abbreviation command_parser := command_parser_m syntax
@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_rec monad_basic_read]
def term_parser_m := rec_t nat syntax command_parser_m
abbreviation term_parser := term_parser_m syntax
end «parser»
end lean