lean4-htt/library/init/lean/parser/syntax.lean

146 lines
5.1 KiB
Text

/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Sebastian Ullrich
-/
prelude
import init.lean.name init.lean.parser.parsec
namespace lean
namespace parser
--TODO(Sebastian): move
structure substring :=
(start : string.iterator)
(stop : string.iterator)
structure source_info :=
/- Will be inferred after parsing by `syntax.update_leading`. During parsing,
it is not at all clear what the preceding token was, especially with backtracking. -/
(leading : substring)
(pos : parsec.position)
(trailing : substring)
structure syntax_atom :=
(info : option source_info := none) (val : string)
/-- A simple wrapper that should remind you to use the static declaration instead
of hard-coding the node name. -/
structure syntax_node_kind :=
-- should be equal to the name of the declaration this structure instance was bound to
(name : name)
/-- Signifies ambiguous syntax to be disambiguated by the elaborator. Should have at least two children.
This node kind is special-cased by `syntax.reprint` since its children's outputs should not be concatenated. -/
@[pattern] def choice : syntax_node_kind := ⟨`lean.parser.choice⟩
/-- A nondescriptive kind that can be used for merely grouping syntax trees into a node.
This node kind is special-cased by `syntax.format` to be printed as brackets `[...]` without a node kind. -/
@[pattern] def no_kind : syntax_node_kind := ⟨`lean.parser.no_kind⟩
/-
Parsers create `syntax_node`'s with the following properties (see implementation of `combinators.node`):
- If `args` contains a `syntax.missing`, then all subsequent elements are also `syntax.missing`.
- The first argument in `args` is not `syntax.missing`
Remark: We do create `syntax_node`'s with an empty `args` field (e.g. for representing `option.none`).
-/
structure syntax_node (syntax : Type) :=
(kind : syntax_node_kind) (args : list syntax)
inductive syntax
| atom (val : syntax_atom)
| node (val : syntax_node syntax)
| missing
instance : inhabited syntax :=
⟨syntax.missing⟩
def substring.to_string (s : substring) : string :=
(s.start.extract s.stop).get_or_else ""
namespace syntax
open lean.format
protected def list (args : list syntax) :=
syntax.node ⟨no_kind, args⟩
def is_of_kind (k : syntax_node_kind) : syntax → bool
| (syntax.node ⟨k', _⟩) := k.name = k'.name
| _ := ff
/- Remark: the state `string.iterator` is the `source_info.trailing.stop` of the previous token,
or the beginning of the string. -/
private mutual def update_leading_aux, update_leading_lst
with update_leading_aux : syntax → state string.iterator syntax
| (atom a@{info := some info, ..}) := do
last ← get,
put info.trailing.stop,
pure $ atom {a with info := some {info with leading := ⟨last, last.nextn (info.pos - last.offset)⟩}}
| (node n) := do args ← update_leading_lst n.args, pure $ node {n with args := args}
| stx := pure stx
with update_leading_lst : list syntax → state string.iterator (list syntax)
| [] := pure []
| (s::ss) := list.cons <$> update_leading_aux s <*> update_leading_lst ss
/-- Set `source_info.leading` according to the trailing stop of the preceding token.
The result is a round-tripping syntax tree IF, in the input syntax tree,
* all leading stops, atom contents, and trailing starts are correct
* trailing stops are between the trailing start and the next leading stop.
Remark: after parsing all `source_info.leading` fields are empty.
The syntax argument is the output produced by the parser for `source`.
This function "fixes" the `source.leanding` field.
Note that, the `source_info.trailing` fields are correct.
The implementation of this function relies on this property. -/
def update_leading (source : string) : syntax → syntax :=
λ stx, prod.fst $ (update_leading_aux stx).run source.mk_iterator
def is_empty_node : syntax → bool
| (node ⟨_, []⟩) := tt
| _ := ff
/-- Retrieve the left-most leaf in the syntax tree. -/
def get_head_atom : syntax → option syntax_atom
| (atom a) := some a
-- TODO: handle case where `n` is an empty `syntax_node`
-- We will have to create a mutual recursion here Arghhhh
| (node ⟨_, n::ns⟩) := n.get_head_atom
| _ := none
def get_pos (stx : syntax) : option parsec.position :=
do a ← stx.get_head_atom,
i ← a.info,
pure i.pos
def reprint_atom : syntax_atom → string
| ⟨some info, s⟩ := info.leading.to_string ++ s ++ info.trailing.to_string
| ⟨none, s⟩ := s
mutual def reprint, reprint_lst
with reprint : syntax → option string
| (atom a) := reprint_atom a
| (node ⟨k, ns⟩) :=
if k.name = choice.name then match ns with
-- should never happen
| [] := failure
-- check that every choice prints the same
| n::ns := do
s ← reprint n,
ss ← reprint_lst ns,
guard $ ss.all (= s),
pure s
else string.join <$> reprint_lst ns
| missing := ""
with reprint_lst : list syntax → option (list string)
| [] := pure []
| (n::ns) := do
s ← reprint n,
ss ← reprint_lst ns,
pure $ s::ss
end syntax
end parser
end lean