lean4-htt/library/init/lean/parser/syntax.lean
2018-10-11 15:54:57 -07:00

172 lines
6.2 KiB
Text

/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Sebastian Ullrich
-/
prelude
import init.lean.name init.lean.parser.parsec
namespace lean
namespace parser
--TODO(Sebastian): move
structure substring :=
(start : string.iterator)
(stop : string.iterator)
structure source_info :=
/- Will be inferred after parsing by `syntax.update_leading`. During parsing,
it is not at all clear what the preceding token was, especially with backtracking. -/
(leading : substring)
(pos : parsec.position)
(trailing : substring)
structure syntax_atom :=
(info : option source_info := none) (val : string)
/-- A simple wrapper that should remind you to use the static declaration instead
of hard-coding the node name. -/
structure syntax_node_kind :=
-- should be equal to the name of the declaration this structure instance was bound to
(name : name)
@[pattern] def choice : syntax_node_kind := ⟨`lean.parser.choice⟩
/-
Parsers create `syntax_node`'s with the following property:
- If `args` contains a `syntax.missing`, then all subsequent elements are also `syntax.missing`.
- We believe the first argument in `args` is not `syntax.missing`. TODO: check this.
Remark: We do create `syntax_node`'s with an empty `args` field.
-/
structure syntax_node (syntax : Type) :=
-- TODO: add `lean.parser.list` kind, and remove option. Then `none` = `lean.parser.seq`
(kind : option syntax_node_kind) (args : list syntax)
inductive syntax
| atom (val : syntax_atom)
| node (val : syntax_node syntax)
| missing
instance : inhabited syntax :=
⟨syntax.missing⟩
def substring.to_string (s : substring) : string :=
(s.start.extract s.stop).get_or_else ""
namespace syntax
open lean.format
def is_of_kind (k : syntax_node_kind) : syntax → bool
| (syntax.node ⟨some k', _⟩) := k.name = k'.name
| _ := ff
-- Remark: this function must be updated whenever `ident` parser is modified.
-- This function was defined before we had the `ident` parser.
-- TODO: move it to the `ident` parser file and use the view defined there.
private def ident_to_format : syntax → format
| stx := option.get_or_else (do
syntax.node ⟨_, [syntax.node ⟨_, [syntax.node ⟨some ⟨idx⟩, part⟩]⟩, suffix]⟩ ← pure stx | failure,
part ← match idx, part with
| name.mk_numeral name.anonymous 0, [syntax.node ⟨_, [_, syntax.atom ⟨_, s⟩, _]⟩] := pure $ to_fmt "«" ++ s ++ "»"
| name.mk_numeral name.anonymous 1, [syntax.atom ⟨_, s⟩] := pure $ s
| _, _ := failure,
match suffix with
| syntax.node ⟨_, []⟩ := pure $ to_fmt part
| syntax.node ⟨_, [syntax.node ⟨_, [_, id]⟩]⟩ := pure $ to_fmt part ++ "." ++ ident_to_format id
| _ := failure
) "syntax.to_format: unexpected `ident` node content"
protected mutual def to_format, to_format_lst
with to_format : syntax → format
| (atom ⟨_, s⟩) := to_fmt $ repr s
| (node {kind := none, args := args, ..}) :=
sbracket $ join_sep (to_format_lst args) line
| stx@(node {kind := some kind, args := args, ..}) :=
if kind.name = `lean.parser.ident
then to_fmt "`" ++ ident_to_format stx
else let shorter_name := kind.name.replace_prefix `lean.parser name.anonymous
in paren $ join_sep (to_fmt shorter_name :: to_format_lst args) line
| missing := "<missing>"
with to_format_lst : list syntax → list format
| [] := []
| (s::ss) := to_format s :: to_format_lst ss
/- Remark: the state `string.iterator` is the `source_info.trailing.stop` of the previous token,
or the beginning of the string. -/
private mutual def update_leading_aux, update_leading_lst
with update_leading_aux : syntax → state string.iterator syntax
| (atom a@{info := some info, ..}) := do
last ← get,
put info.trailing.stop,
pure $ atom {a with info := some {info with leading := ⟨last, last.nextn (info.pos - last.offset)⟩}}
| (node n) := do args ← update_leading_lst n.args, pure $ node {n with args := args}
| stx := pure stx
with update_leading_lst : list syntax → state string.iterator (list syntax)
| [] := pure []
| (s::ss) := list.cons <$> update_leading_aux s <*> update_leading_lst ss
/-- Set `source_info.leading` according to the trailing stop of the preceding token.
The result is a round-tripping syntax tree IF, in the input syntax tree,
* all leading stops, atom contents, and trailing starts are correct
* trailing stops are between the trailing start and the next leading stop.
Remark: after parsing all `source_info.leading` fields are empty.
The syntax argument is the output produced by the parser for `source`.
This function "fixes" the `source.leanding` field.
Note that, the `source_info.trailing` fields are correct.
The implementation of this function relies on this property. -/
def update_leading (source : string) : syntax → syntax :=
λ stx, prod.fst $ (update_leading_aux stx).run source.mk_iterator
def is_empty_node : syntax → bool
| (node ⟨_, []⟩) := tt
| _ := ff
/-- Retrieve the left-most leaf in the syntax tree. -/
def get_head_atom : syntax → option syntax_atom
| (atom a) := some a
-- TODO: handle case where `n` is an empty `syntax_node`
-- We will have to create a mutual recursion here Arghhhh
| (node ⟨_, n::ns⟩) := n.get_head_atom
| _ := none
def get_pos (stx : syntax) : option parsec.position :=
do a ← stx.get_head_atom,
i ← a.info,
pure i.pos
def reprint_atom : syntax_atom → string
| ⟨some info, s⟩ := info.leading.to_string ++ s ++ info.trailing.to_string
| ⟨none, s⟩ := s
mutual def reprint, reprint_lst
with reprint : syntax → option string
| (atom a) := reprint_atom a
| (node ⟨some k, ns⟩) :=
if k.name = choice.name then match ns with
-- should never happen
| [] := failure
-- check that every choice prints the same
| n::ns := do
s ← reprint n,
ss ← reprint_lst ns,
guard $ ss.all (= s),
pure s
else string.join <$> reprint_lst ns
| (node ⟨_, ns⟩) := string.join <$> reprint_lst ns
| missing := ""
with reprint_lst : list syntax → option (list string)
| [] := pure []
| (n::ns) := do
s ← reprint n,
ss ← reprint_lst ns,
pure $ s::ss
end syntax
instance : has_to_format syntax := ⟨syntax.to_format⟩
instance : has_to_string syntax := ⟨to_string ∘ to_fmt⟩
end parser
end lean