chore(library/init/lean/parser): add a few comments
This commit is contained in:
parent
aff6d58659
commit
15a25d5aa9
6 changed files with 97 additions and 56 deletions
|
|
@ -35,7 +35,12 @@ structure token_config :=
|
|||
/- An optional parser that is activated after matching `prefix`.
|
||||
It should return a syntax tree with a "hole" for the
|
||||
`source_info` surrounding the token, which will be supplied
|
||||
by the `token` parser. -/
|
||||
by the `token` parser.
|
||||
|
||||
Remark: `suffix_parser` has many applications for example for parsing
|
||||
hexdecimal numbers, `prefix` is `0x` and `suffix_parser` is the parser `digit*`.
|
||||
We also use it to parse string literals: here `prefix` is just `"`.
|
||||
-/
|
||||
(suffix_parser : option (parsec' (source_info → syntax)) := none)
|
||||
|
||||
-- Backtrackable state
|
||||
|
|
@ -50,6 +55,7 @@ structure token_cache_entry :=
|
|||
structure parser_cache :=
|
||||
(token_cache : option token_cache_entry := none)
|
||||
|
||||
/- Remark: if we have a node in the trie with `some token_config`, the string induced by the path is equal to the `token_config.prefix`. -/
|
||||
structure parser_config :=
|
||||
(tokens : trie token_config)
|
||||
(filename : string)
|
||||
|
|
@ -80,12 +86,8 @@ variable {ρ : Type}
|
|||
class has_tokens (r : ρ) := mk {} ::
|
||||
(tokens : list token_config)
|
||||
|
||||
def donotinline {α : Type} (a : α) (f : α → α := id) :=
|
||||
f (f a)
|
||||
|
||||
-- do NOT inline this function
|
||||
def tokens (r : ρ) [has_tokens r] :=
|
||||
donotinline (has_tokens.tokens r)
|
||||
@[noinline] def tokens (r : ρ) [has_tokens r] :=
|
||||
has_tokens.tokens r
|
||||
|
||||
instance has_tokens.inhabited (r : ρ) : inhabited (has_tokens r) :=
|
||||
⟨⟨[]⟩⟩
|
||||
|
|
@ -176,6 +178,7 @@ instance command_parser_m.basic_parser (ρ : Type) [has_lift_t ρ parser_config]
|
|||
⟨λ _ x cfg rec, x.run ↑cfg⟩
|
||||
end
|
||||
|
||||
/- The `nat` at `rec_t` is the lbp` -/
|
||||
@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_rec monad_basic_parser]
|
||||
def term_parser_m := rec_t nat syntax $ command_parser_m parser_config
|
||||
abbreviation term_parser := term_parser_m syntax
|
||||
|
|
@ -189,6 +192,7 @@ instance trailing_term_parser_coe : has_coe term_parser trailing_term_parser :=
|
|||
⟨λ x _, x⟩
|
||||
|
||||
-- This needs to be a separate structure since `term_parser`s cannot contain themselves in their config
|
||||
-- TODO: we need indexing here.
|
||||
structure command_parser_config extends parser_config :=
|
||||
(leading_term_parsers : list term_parser)
|
||||
(trailing_term_parsers : list trailing_term_parser)
|
||||
|
|
|
|||
|
|
@ -20,18 +20,18 @@ local notation `parser` := m syntax
|
|||
variables [monad m] [monad_except (parsec.message syntax) m] [monad_parsec syntax m] [alternative m]
|
||||
|
||||
def node' (k : option syntax_node_kind) (rs : list parser) : parser :=
|
||||
do (args, _) ← rs.mfoldl (λ (p : list syntax × nat) r, do
|
||||
(args, remaining) ← pure p,
|
||||
do args ← rs.mfoldl (λ (p : list syntax) r, do
|
||||
args ← pure p,
|
||||
-- on error, append partial syntax tree to previous successful parses and rethrow
|
||||
a ← catch r $ λ msg,
|
||||
let args := msg.custom :: args in
|
||||
throw {msg with custom := syntax.node ⟨k, args.reverse⟩},
|
||||
pure (a::args, remaining - 1)
|
||||
) ([], rs.length),
|
||||
pure (a::args)
|
||||
) [],
|
||||
pure $ syntax.node ⟨k, args.reverse⟩
|
||||
|
||||
@[reducible] def seq : list parser → parser := node' none
|
||||
@[reducible] def node (k : syntax_node_kind) : list parser → parser := node' k
|
||||
@[reducible] def node (k : syntax_node_kind) : list parser → parser := node' (some k)
|
||||
|
||||
instance node'.tokens (k) (rs : list parser) [parser.has_tokens rs] : parser.has_tokens (node' k rs) :=
|
||||
⟨tokens rs⟩
|
||||
|
|
@ -39,6 +39,8 @@ instance node'.tokens (k) (rs : list parser) [parser.has_tokens rs] : parser.has
|
|||
instance node.view (k) (rs : list parser) [i : has_view k α] : parser.has_view (node k rs) α :=
|
||||
{ view := i.view, review := i.review }
|
||||
|
||||
-- Each parser combinator comes equipped with `has_view` and `has_tokens` instances
|
||||
|
||||
private def many1_aux (p : parser) : list syntax → nat → parser
|
||||
| as 0 := error "unreachable"
|
||||
| as (n+1) := do
|
||||
|
|
@ -66,6 +68,7 @@ instance many.tokens (r : parser) [parser.has_tokens r] : parser.has_tokens (man
|
|||
⟨tokens r⟩
|
||||
|
||||
instance many.view (r : parser) [has_view r α] : parser.has_view (many r) (list α) :=
|
||||
/- Remark: `many1.view` can handle empty list. -/
|
||||
{..many1.view r}
|
||||
|
||||
private def sep_by_aux (p : m syntax) (sep : parser) (allow_trailing_sep : bool) : bool → list syntax → nat → parser
|
||||
|
|
@ -164,15 +167,22 @@ instance longest_match.view (rs : list parser) : parser.has_view (longest_match
|
|||
/-- Parse a list `[p1, ..., pn]` of parsers as `p1 <|> ... <|> pn`.
|
||||
The result will be wrapped in a node with the the index of the successful
|
||||
parser as the name. -/
|
||||
def choice_aux : list parser → nat → parser
|
||||
| [] _ := error "choice: empty list"
|
||||
| (r::rs) i :=
|
||||
do { stx ← r,
|
||||
pure $ syntax.node ⟨some ⟨name.mk_numeral name.anonymous i⟩, [stx]⟩ }
|
||||
<|> choice_aux rs (i+1)
|
||||
|
||||
def choice (rs : list parser) : parser :=
|
||||
rs.enum.foldr
|
||||
(λ ⟨i, r⟩ r', (λ stx, syntax.node ⟨some ⟨name.mk_numeral name.anonymous i⟩, [stx]⟩) <$> r <|> r')
|
||||
-- use `foldr` so that any other error is preferred over this one
|
||||
(error "choice: empty list")
|
||||
choice_aux rs 0
|
||||
|
||||
instance choice.tokens (rs : list parser) [parser.has_tokens rs] : parser.has_tokens (choice rs) :=
|
||||
⟨tokens rs⟩
|
||||
|
||||
/- Remark: `choice` does not have `has_view` instance because we only use it at the pratt combinator
|
||||
which doesn't need the view. -/
|
||||
|
||||
instance try.tokens (r : parser) [parser.has_tokens r] : parser.has_tokens (try r) :=
|
||||
⟨tokens r⟩
|
||||
instance try.view (r : parser) [i : parser.has_view r α] : parser.has_view (try r) α :=
|
||||
|
|
|
|||
|
|
@ -256,10 +256,10 @@ namespace monad_parsec
|
|||
open parsec_t
|
||||
variables {m : Type → Type} [monad m] [monad_parsec μ m] [inhabited μ] {α β : Type}
|
||||
|
||||
@[inline] def error {α : Type} (unexpected : string) (expected : dlist string := dlist.empty) (it : option iterator := none) (custom : μ := default _) : m α :=
|
||||
def error {α : Type} (unexpected : string) (expected : dlist string := dlist.empty) (it : option iterator := none) (custom : μ := default _) : m α :=
|
||||
lift $ λ it', result.error { unexpected := unexpected, expected := expected, it := it.get_or_else it', custom := custom } ff
|
||||
|
||||
@[inline] def left_over : m iterator :=
|
||||
def left_over : m iterator :=
|
||||
lift $ λ it, result.mk_eps it it
|
||||
|
||||
/-- Return the number of characters left to be parsed. -/
|
||||
|
|
@ -310,7 +310,7 @@ do it ← left_over,
|
|||
if p c then error (repr c)
|
||||
else pure ()
|
||||
|
||||
@[inline] def eoi_error (it : iterator) : result μ α :=
|
||||
def eoi_error (it : iterator) : result μ α :=
|
||||
result.error { it := it, unexpected := "end of input", custom := default _ } ff
|
||||
|
||||
def curr : m char :=
|
||||
|
|
@ -360,7 +360,7 @@ This parser consumes no input if it fails (even if a partial match).
|
|||
Note: The behaviour of this parser is different to that the `string` parser in the Parsec_t Μ M Haskell library,
|
||||
as this one is all-or-nothing.
|
||||
-/
|
||||
@[inline] def str (s : string) : m string :=
|
||||
def str (s : string) : m string :=
|
||||
if s.is_empty then pure ""
|
||||
else lift $ λ it, match str_aux s.length s.mk_iterator it with
|
||||
| some it' := result.ok s it' none
|
||||
|
|
@ -377,7 +377,7 @@ def take (n : nat) : m string :=
|
|||
if n = 0 then pure ""
|
||||
else lift $ take_aux n ""
|
||||
|
||||
@[inline] private def mk_string_result (r : string) (it : iterator) : result μ string :=
|
||||
private def mk_string_result (r : string) (it : iterator) : result μ string :=
|
||||
if r.is_empty then result.mk_eps r it
|
||||
else result.ok r it none
|
||||
|
||||
|
|
@ -415,7 +415,7 @@ take_while (λ c, !p c)
|
|||
def take_until1 (p : char → bool) : m string :=
|
||||
take_while1 (λ c, !p c)
|
||||
|
||||
@[inline] private def mk_consumed_result (consumed : bool) (it : iterator) : result μ unit :=
|
||||
private def mk_consumed_result (consumed : bool) (it : iterator) : result μ unit :=
|
||||
if consumed then result.ok () it none
|
||||
else result.mk_eps () it
|
||||
|
||||
|
|
|
|||
|
|
@ -9,10 +9,6 @@ import init.lean.name init.lean.parser.parsec
|
|||
namespace lean
|
||||
namespace parser
|
||||
|
||||
/-- De Bruijn index relative to surrounding 'bind' macros -/
|
||||
@[reducible] def var_offset := ℕ
|
||||
@[reducible] def macro_scope_id := ℕ
|
||||
|
||||
--TODO(Sebastian): move
|
||||
structure substring :=
|
||||
(start : string.iterator)
|
||||
|
|
@ -36,11 +32,18 @@ structure syntax_node_kind :=
|
|||
|
||||
@[pattern] def choice : syntax_node_kind := ⟨`lean.parser.choice⟩
|
||||
|
||||
/-
|
||||
Parsers create `syntax_node`'s with the following property:
|
||||
- If `args` contains a `syntax.missing`, then all subsequent elements are also `syntax.missing`.
|
||||
- We believe the first argument in `args` is not `syntax.missing`. TODO: check this.
|
||||
|
||||
Remark: We do create `syntax_node`'s with an empty `args` field.
|
||||
-/
|
||||
structure syntax_node (syntax : Type) :=
|
||||
-- TODO: add `lean.parser.list` kind, and remove option. Then `none` = `lean.parser.seq`
|
||||
(kind : option syntax_node_kind) (args : list syntax)
|
||||
|
||||
inductive syntax
|
||||
/- any non-ident atom -/
|
||||
| atom (val : syntax_atom)
|
||||
| node (val : syntax_node syntax)
|
||||
| missing
|
||||
|
|
@ -48,9 +51,6 @@ inductive syntax
|
|||
instance : inhabited syntax :=
|
||||
⟨syntax.missing⟩
|
||||
|
||||
instance coe_string_syntax : has_coe string syntax :=
|
||||
⟨λ s, syntax.atom ⟨none, s⟩⟩
|
||||
|
||||
def substring.to_string (s : substring) : string :=
|
||||
(s.start.extract s.stop).get_or_else ""
|
||||
|
||||
|
|
@ -61,6 +61,9 @@ def is_of_kind (k : syntax_node_kind) : syntax → bool
|
|||
| (syntax.node ⟨some k', _⟩) := k.name = k'.name
|
||||
| _ := ff
|
||||
|
||||
-- Remark: this function must be updated whenever `ident` parser is modified.
|
||||
-- This function was defined before we had the `ident` parser.
|
||||
-- TODO: move it to the `ident` parser file and use the view defined there.
|
||||
private def ident_to_format : syntax → format
|
||||
| stx := option.get_or_else (do
|
||||
syntax.node ⟨_, [syntax.node ⟨_, [syntax.node ⟨some ⟨idx⟩, part⟩]⟩, suffix]⟩ ← pure stx | failure,
|
||||
|
|
@ -82,12 +85,15 @@ with to_format : syntax → format
|
|||
| stx@(node {kind := some kind, args := args, ..}) :=
|
||||
if kind.name = `lean.parser.ident
|
||||
then to_fmt "`" ++ ident_to_format stx
|
||||
else paren $ join_sep (to_fmt (kind.name.replace_prefix `lean.parser name.anonymous) :: to_format_lst args) line
|
||||
else let shorter_name := kind.name.replace_prefix `lean.parser name.anonymous
|
||||
in paren $ join_sep (to_fmt shorter_name :: to_format_lst args) line
|
||||
| missing := "<missing>"
|
||||
with to_format_lst : list syntax → list format
|
||||
| [] := []
|
||||
| (s::ss) := to_format s :: to_format_lst ss
|
||||
|
||||
/- Remark: the state `string.iterator` is the `source_info.trailing.stop` of the previous token,
|
||||
or the beginning of the string. -/
|
||||
private mutual def update_leading_aux, update_leading_lst
|
||||
with update_leading_aux : syntax → state string.iterator syntax
|
||||
| (atom a@{info := some info, ..}) := do
|
||||
|
|
@ -103,14 +109,27 @@ with update_leading_lst : list syntax → state string.iterator (list syntax)
|
|||
/-- Set `source_info.leading` according to the trailing stop of the preceding token.
|
||||
The result is a round-tripping syntax tree IF, in the input syntax tree,
|
||||
* all leading stops, atom contents, and trailing starts are correct
|
||||
* trailing stops are between the trailing start and the next leading stop. -/
|
||||
* trailing stops are between the trailing start and the next leading stop.
|
||||
|
||||
Remark: after parsing all `source_info.leading` fields are empty.
|
||||
The syntax argument is the output produced by the parser for `source`.
|
||||
This function "fixes" the `source.leanding` field.
|
||||
|
||||
Note that, the `source_info.trailing` fields are correct.
|
||||
The implementation of this function relies on this property. -/
|
||||
def update_leading (source : string) : syntax → syntax :=
|
||||
λ stx, prod.fst $ (update_leading_aux stx).run source.mk_iterator
|
||||
|
||||
/-- Retrieve the left-most leaf in the syntax tree -/
|
||||
def is_empty_node : syntax → bool
|
||||
| (node ⟨_, []⟩) := tt
|
||||
| _ := ff
|
||||
|
||||
/-- Retrieve the left-most leaf in the syntax tree. -/
|
||||
def get_head_atom : syntax → option syntax_atom
|
||||
| (atom a) := some a
|
||||
| (node ⟨_, n::_⟩) := n.get_head_atom
|
||||
-- TODO: handle case where `n` is an empty `syntax_node`
|
||||
-- We will have to create a mutual recursion here Arghhhh
|
||||
| (node ⟨_, n::ns⟩) := n.get_head_atom
|
||||
| _ := none
|
||||
|
||||
def get_pos (stx : syntax) : option parsec.position :=
|
||||
|
|
@ -118,13 +137,13 @@ do a ← stx.get_head_atom,
|
|||
i ← a.info,
|
||||
pure i.pos
|
||||
|
||||
def reprint_with_info : option source_info → string → string
|
||||
| (some info) inner := info.leading.to_string ++ inner ++ info.trailing.to_string
|
||||
| none inner := inner
|
||||
def reprint_atom : syntax_atom → string
|
||||
| ⟨some info, s⟩ := info.leading.to_string ++ s ++ info.trailing.to_string
|
||||
| ⟨none, s⟩ := s
|
||||
|
||||
mutual def reprint, reprint_lst
|
||||
with reprint : syntax → option string
|
||||
| (atom ⟨info, s⟩) := reprint_with_info info s
|
||||
| (atom a) := reprint_atom a
|
||||
| (node ⟨some k, ns⟩) :=
|
||||
if k.name = choice.name then match ns with
|
||||
-- should never happen
|
||||
|
|
|
|||
|
|
@ -26,10 +26,12 @@ do cfg ← read,
|
|||
|
||||
private def finish_comment_block_aux : nat → nat → basic_parser_m unit
|
||||
| nesting (n+1) :=
|
||||
str "/-" *> finish_comment_block_aux (nesting + 1) n <|>
|
||||
str "/-" *> finish_comment_block_aux (nesting + 1) n
|
||||
<|>
|
||||
str "-/" *>
|
||||
(if nesting = 1 then pure ()
|
||||
else finish_comment_block_aux (nesting - 1) n) <|>
|
||||
if nesting = 1 then pure ()
|
||||
else finish_comment_block_aux (nesting - 1) n
|
||||
<|>
|
||||
any *> finish_comment_block_aux nesting n
|
||||
| _ _ := error "unreachable"
|
||||
|
||||
|
|
@ -40,9 +42,11 @@ do r ← remaining,
|
|||
private def whitespace_aux : nat → basic_parser_m unit
|
||||
| (n+1) :=
|
||||
do whitespace,
|
||||
str "--" *> take_while' (≠ '\n') *> whitespace_aux n <|>
|
||||
str "--" *> take_while' (≠ '\n') *> whitespace_aux n
|
||||
<|>
|
||||
-- a "/--" doc comment is an actual token, not whitespace
|
||||
try (str "/-" *> not_followed_by (str "-")) *> finish_comment_block *> whitespace_aux n <|>
|
||||
try (str "/-" *> not_followed_by (str "-")) *> finish_comment_block *> whitespace_aux n
|
||||
<|>
|
||||
pure ()
|
||||
| 0 := error "unreachable"
|
||||
|
||||
|
|
@ -78,18 +82,23 @@ with update_trailing_lst : substring → list syntax → list syntax
|
|||
| trailing [stx] := [update_trailing trailing stx]
|
||||
| trailing (stx::stxs) := stx :: update_trailing_lst trailing stxs
|
||||
|
||||
@[inline] def with_trailing (stx : syntax) : m syntax :=
|
||||
def with_trailing (stx : syntax) : m syntax :=
|
||||
do -- TODO(Sebastian): less greedy, more natural whitespace assignment
|
||||
-- E.g. only read up to the next line break
|
||||
trailing ← lift $ as_substring $ whitespace,
|
||||
pure $ update_trailing trailing stx
|
||||
|
||||
def mk_raw_res (start stop : string.iterator) : syntax :=
|
||||
let ss : substring := ⟨start, stop⟩ in
|
||||
syntax.atom ⟨some {leading := ⟨start, start⟩, pos := start.offset, trailing := ⟨stop, stop⟩}, ss.to_string⟩
|
||||
|
||||
/-- Match an arbitrary parser and return the consumed string in a `syntax.atom`. -/
|
||||
@[inline] def raw {α : Type} (p : m α) (trailing_ws := ff) : parser :=
|
||||
try $ do
|
||||
it ← left_over,
|
||||
ss ← as_substring p,
|
||||
let stx := syntax.atom ⟨some {leading := ⟨it, it⟩, pos := it.offset, trailing := ⟨it, it⟩}, ss.to_string⟩,
|
||||
start ← left_over,
|
||||
p,
|
||||
stop ← left_over,
|
||||
let stx := mk_raw_res start stop,
|
||||
if trailing_ws then with_trailing stx else pure stx
|
||||
|
||||
instance raw.tokens {α} (p : m α) (t) : parser.has_tokens (raw p t : parser) := default _
|
||||
|
|
@ -100,12 +109,12 @@ instance raw.view {α} (p : m α) (t) : parser.has_view (raw p t : parser) (opti
|
|||
review := λ a, (syntax.atom <$> a).get_or_else syntax.missing }
|
||||
|
||||
/-- Like `raw (str s)`, but default to `s` in views. -/
|
||||
@[inline, derive has_tokens has_view]
|
||||
@[derive has_tokens has_view]
|
||||
def raw_str (s : string) (trailing_ws := ff) : parser :=
|
||||
raw (str s) trailing_ws
|
||||
|
||||
instance raw_str.view_default (s) (t) : parser.has_view_default (raw_str s t : parser) (option syntax_atom)
|
||||
(some {val := s}) := ⟨⟩
|
||||
instance raw_str.view_default (s) (t) : parser.has_view_default (raw_str s t : parser) (option syntax_atom) (some {val := s}) :=
|
||||
⟨⟩
|
||||
|
||||
end
|
||||
|
||||
|
|
@ -119,14 +128,14 @@ node_choice! ident_part {
|
|||
escaped: raw $ take_until1 is_id_end_escape,
|
||||
esc_end: raw_str id_end_escape.to_string,
|
||||
],
|
||||
default: lookahead (satisfy is_id_first) *> raw (take_while is_id_rest)
|
||||
default: raw $ satisfy is_id_first *> take_while is_id_rest
|
||||
}
|
||||
|
||||
@[derive has_tokens has_view]
|
||||
def ident_suffix.parser : rec_t unit syntax basic_parser_m syntax :=
|
||||
-- consume '.' only when followed by a character starting an ident_part
|
||||
try (lookahead (ch '.' *> (ch id_begin_escape *> pure () <|> satisfy is_id_first *> pure ()))) *>
|
||||
node! ident_suffix [«.»: raw_str ".", ident: recurse ()]
|
||||
try (lookahead (ch '.' *> (ch id_begin_escape <|> satisfy is_id_first)))
|
||||
*> node! ident_suffix [«.»: raw_str ".", ident: recurse ()]
|
||||
|
||||
def ident'' : rec_t unit syntax basic_parser_m syntax :=
|
||||
node! ident [part: monad_lift ident_part.parser, suffix: optional ident_suffix.parser]
|
||||
|
|
@ -277,8 +286,7 @@ instance symbol_or_ident.view (sym) : parser.has_view (symbol_or_ident sym : par
|
|||
def unicode_symbol (unicode ascii : string) (lbp := 0) : parser :=
|
||||
lift $ any_of [symbol unicode lbp, symbol ascii lbp]
|
||||
-- use unicode variant by default
|
||||
instance unicode_symbol.view_default (u a lbp) : parser.has_view_default (unicode_symbol u a lbp : parser) _
|
||||
(u : syntax) := ⟨⟩
|
||||
instance unicode_symbol.view_default (u a lbp) : parser.has_view_default (unicode_symbol u a lbp : parser) _ (syntax.atom ⟨none, u⟩) := ⟨⟩
|
||||
|
||||
end «parser»
|
||||
end lean
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ csimp_cfg::csimp_cfg() {
|
|||
m_inline_threshold = 1;
|
||||
m_float_cases_app = true;
|
||||
m_float_cases = true;
|
||||
m_float_cases_threshold = 40;
|
||||
m_float_cases_threshold = 10;
|
||||
m_inline_jp_threshold = 2;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue