feat(library/init/lean/parser/{pratt,level}): factor out pratt combinator, implement level parsers

This commit is contained in:
Sebastian Ullrich 2018-09-12 15:12:05 -07:00
parent b7c3f517a9
commit 98e09c274f
11 changed files with 200 additions and 57 deletions

View file

@ -39,7 +39,7 @@ instance : monad (rec_t α δ m) := infer_instance
instance [alternative m] : alternative (rec_t α δ m) := infer_instance
instance : has_monad_lift m (rec_t α δ m) := infer_instance
instance (ε) [monad_except ε m] : monad_except ε (rec_t α δ m) := infer_instance
instance (μ) [alternative m] [lean.parser.monad_parsec μ m] : lean.parser.monad_parsec μ (rec_t α δ m) :=
instance (μ) [lean.parser.monad_parsec μ m] : lean.parser.monad_parsec μ (rec_t α δ m) :=
infer_instance
end rec_t
@ -347,6 +347,16 @@ instance monad_lift.view {m' : Type → Type} [has_monad_lift_t m m'] (r : m syn
parser.has_view (monad_lift r : m' syntax) α :=
{..i}
instance seq_left.tokens {α : Type} (x : m α) (p : m syntax) [parser.has_tokens p] : parser.has_tokens (p <* x) :=
⟨tokens p⟩
instance seq_left.view {α β : Type} (x : m α) (p : m syntax) [i : parser.has_view p β] : parser.has_view (p <* x) β :=
{..i}
instance seq_right.tokens {α : Type} (x : m α) (p : m syntax) [parser.has_tokens p] : parser.has_tokens (x *> p) :=
⟨tokens p⟩
instance seq_right.view {α β : Type} (x : m α) (p : m syntax) [i : parser.has_view p β] : parser.has_view (x *> p) β :=
{..i}
end combinators
end «parser»
end lean

View file

@ -135,10 +135,14 @@ node! «reserve_mixfix» [
try ["reserve", kind: mixfix.kind.parser],
symbol: notation_spec.notation_symbol.parser]
@[derive parser.has_tokens parser.has_view]
def check.parser : command_parser :=
node! check ["#check", term: term.parser]
@[derive parser.has_tokens parser.has_view]
def command.parser : command_parser :=
any_of [open.parser, section.parser, universe.parser, notation.parser, reserve_notation.parser,
mixfix.parser, reserve_mixfix.parser] <?> "command"
mixfix.parser, reserve_mixfix.parser, check.parser] <?> "command"
end parser

View file

@ -0,0 +1,71 @@
/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Sebastian Ullrich
Level-level parsers
-/
prelude
import init.lean.parser.pratt
namespace lean
namespace parser
namespace level
open combinators parser.has_view monad_parsec
@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_rec monad_basic_read]
def level_parser_m := rec_t nat syntax basic_parser_m
abbreviation level_parser := level_parser_m syntax
/-- A term parser for a suffix or infix notation that accepts a preceding term. -/
@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_rec monad_basic_read]
def trailing_level_parser_m := reader_t syntax level_parser_m
abbreviation trailing_level_parser := trailing_level_parser_m syntax
/-- Access leading term -/
def get_leading : trailing_level_parser := read
instance : has_tokens get_leading := ⟨[]⟩
instance : has_view get_leading syntax := default _
@[derive parser.has_tokens parser.has_view]
def paren.parser : level_parser :=
node! «paren» ["(":max_prec, inner: recurse 0, ")"]
@[derive parser.has_tokens parser.has_view]
def leading.parser : level_parser :=
node_choice! leading {
max: raw_symbol "max",
imax: raw_symbol "imax",
hole: symbol "_" max_prec,
paren: paren.parser,
lit: number,
var: ident
}
@[derive parser.has_tokens parser.has_view]
def app.parser : trailing_level_parser :=
node! app [fn: get_leading, arg: recurse max_prec]
@[derive parser.has_tokens parser.has_view]
def add_lit.parser : trailing_level_parser :=
node! add_lit [lhs: get_leading, "+", rhs: number]
@[derive parser.has_tokens parser.has_view]
def trailing.parser : trailing_level_parser :=
node_choice! trailing {
app: app.parser,
add_lit: add_lit.parser
}
end level
def level.parser (rbp := 0) : basic_parser :=
pratt_parser level.leading.parser level.trailing.parser rbp <?> "universe term"
-- `[derive]` doesn't manage to derive these instances because of the parameter
instance level.parser.tokens (rbp) : has_tokens (level.parser rbp) :=
⟨has_tokens.tokens level.leading.parser ++ has_tokens.tokens level.trailing.parser⟩
instance level.parser.view (rbp) : has_view (level.parser rbp) syntax :=
default _
end parser
end lean

View file

@ -25,7 +25,7 @@ def module_parser_m := parser_t (coroutine unit syntax)
end
abbreviation module_parser := module_parser_m syntax
instance module_parser_m.lift_basic_parser_m : has_monad_lift_t basic_parser_m module_parser_m :=
instance module_parser_m.lift_basic_parser_m : monad_basic_read module_parser_m :=
{ monad_lift := λ α x, ⟨λ r, ⟨λ st it, pure (((x.run r).run st) it)⟩⟩ }
@[derive parser.has_view parser.has_tokens]

View file

@ -545,6 +545,9 @@ do it ← left_over,
def dbg (label : string) (p : m α) : m α :=
map (λ m' inst β, @parsec_t.dbg m' inst μ β label) p
def observing [monad_except (message μ) m] (p : m α) : m (except (message μ) α) :=
catch (except.ok <$> p) $ λ msg, pure (except.error msg)
end monad_parsec
namespace monad_parsec

View file

@ -0,0 +1,56 @@
/-
Copyright (c) 2018 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Sebastian Ullrich
A combinator for building Pratt parsers
-/
prelude
import init.lean.parser.token
namespace lean.parser
open monad_parsec combinators
variables {base_m : Type → Type}
variables [monad base_m] [monad_basic_read base_m] [monad_state parser_state base_m] [monad_parsec syntax base_m]
local notation `m` := rec_t nat syntax base_m
local notation `parser` := m syntax
def curr_lbp : m nat :=
do st ← get,
except.ok tk ← (monad_lift $ observing $ lookahead token) <* put st | pure 0 <* put st,
match tk with
| syntax.atom ⟨_, atomic_val.string sym⟩ := do
st ← get,
some ⟨_, tk_cfg⟩ ← pure (st.tokens.match_prefix sym.mk_iterator) | error "curr_lbp: unreachable",
pure tk_cfg.lbp
| syntax.node ⟨base10_lit, _⟩ := pure max_prec
| syntax.ident _ := pure max_prec
| _ := error "curr_lbp: unknown token kind"
private def trailing_loop (trailing : reader_t syntax m syntax) (rbp : nat) : nat → syntax → parser
| 0 _ := error "unreachable"
| (n+1) left := do
lbp ← curr_lbp,
if rbp < lbp then do
left ← trailing.run left,
trailing_loop n left
else
pure left
variables [monad_except (parsec.message syntax) base_m] [alternative base_m]
variables (leading : m syntax) (trailing : reader_t syntax m syntax)
def pratt_parser (rbp := 0) : base_m syntax :=
with_recurse rbp $ λ rbp, do
left ← leading,
n ← remaining,
trailing_loop trailing rbp (n+1) left
instance pratt_parser.tokens [has_tokens leading] [has_tokens trailing] : has_tokens (pratt_parser leading trailing) :=
⟨has_tokens.tokens leading ++ has_tokens.tokens trailing⟩
instance pratt_parser.view : has_view (pratt_parser leading trailing) syntax :=
default _
end lean.parser

View file

@ -6,7 +6,7 @@ Author: Sebastian Ullrich
Term-level parsers
-/
prelude
import init.lean.parser.token
import init.lean.parser.level
namespace lean
namespace parser
@ -25,58 +25,56 @@ abbreviation term_parser := term_parser_m syntax
def trailing_term_parser_m := reader_t syntax term_parser_m
abbreviation trailing_term_parser := trailing_term_parser_m syntax
namespace term
/-- Access leading term -/
def leading : trailing_term_parser := read
instance : has_tokens leading := ⟨[]⟩
instance : has_view leading syntax := default _
def get_leading : trailing_term_parser := read
instance : has_tokens get_leading := ⟨[]⟩
instance : has_view get_leading syntax := default _
@[derive parser.has_tokens parser.has_view]
def hole.parser : term_parser :=
node! hole [hole: symbol "_"]
node! hole [hole: symbol "_" max_prec]
@[derive parser.has_tokens parser.has_view]
def app.parser : trailing_term_parser :=
node! app [fn: leading, arg: recurse max_prec]
def sort.parser : term_parser :=
node_choice! sort {"Sort":max_prec, "Sort*":max_prec, "Type":max_prec, "Type*":max_prec}
@[derive parser.has_tokens parser.has_view]
def leading.parser :=
any_of [
hole.parser
ident,
number,
hole.parser,
sort.parser
]
@[derive parser.has_tokens parser.has_view]
def sort_app.parser : trailing_term_parser :=
do { l ← get_leading, guard (syntax_node_kind.has_view.view sort l).is_some } *>
node! sort_app [fn: get_leading, arg: monad_lift (level.parser max_prec)]
@[derive parser.has_tokens parser.has_view]
def app.parser : trailing_term_parser :=
node! app [fn: get_leading, arg: recurse max_prec]
@[derive parser.has_tokens parser.has_view]
def trailing.parser : trailing_term_parser :=
any_of [
sort_app.parser,
app.parser
]
/- Pratt parser -/
def curr_lbp : term_parser_m nat :=
do some tk ← monad_lift match_token | pure 0,
pure tk.lbp
def trailing_loop (rbp : nat) : nat → syntax → term_parser
| 0 _ := error "unreachable"
| (n+1) left := do
lbp ← curr_lbp,
if rbp < lbp then do
left ← trailing.parser.run left,
trailing_loop n left
else
pure left
end term
-- While term.parser does not actually read a command, it does share the same effect set
-- with command parsers, introducing the term-level recursion effect only for nested parsers
def term.parser : command_parser :=
with_recurse 0 $ λ rbp, do
left ← leading.parser,
n ← remaining,
trailing_loop rbp (n+1) left
def term.parser (rbp := 0) : command_parser :=
pratt_parser term.leading.parser term.trailing.parser rbp <?> "term"
instance term.parser.tokens : has_tokens term.parser :=
⟨has_tokens.tokens leading.parser ++ has_tokens.tokens trailing.parser⟩
instance term.parser.view : has_view term.parser syntax :=
-- `[derive]` doesn't manage to derive these instances because of the parameter
instance term.parser.tokens (rbp) : has_tokens (term.parser rbp) :=
⟨has_tokens.tokens term.leading.parser ++ has_tokens.tokens term.trailing.parser⟩
instance term.parser.view (rbp) : has_view (term.parser rbp) syntax :=
default _
end parser

View file

@ -65,7 +65,7 @@ do token_start ← parser_state.token_start <$> get,
lift whitespace,
it ← left_over,
a ← r,
-- TODO(Sebastian): less greedy, more natural whitespace assignement
-- TODO(Sebastian): less greedy, more natural whitespace assignment
-- E.g. only read up to the next line break
trailing ← lift whitespace,
it2 ← left_over,

View file

@ -3229,6 +3229,7 @@ expr elaborator::visit_node_macro(expr const & e, optional<expr> const & expecte
name fname = *get_name(mdata_data(r), "fname");
r = mdata_expr(r);
r = visit(r, expected_type);
synthesize_type_class_instances();
auto m = mk_metavar(mk_Type(), r);
auto inst = m_ctx.mk_class_instance(mk_app(mk_const(name{"lean", "parser", "has_view"}), exp, r, m));
if (!inst)
@ -3302,15 +3303,16 @@ expr elaborator::visit_node_choice_macro(expr const & e, optional<expr> const &
buffer<expr> new_args;
for (expr e = args; is_app(e); e = app_arg(e)) {
expr r = app_arg(app_fn(e));
name fname = *get_name(mdata_data(r), "fname");
std::string fname = "«" + get_name(mdata_data(r), "fname")->to_string() + "»";
r = mdata_expr(r);
auto m = mk_metavar(mk_Type(), r);
r = visit(r, expected_type);
synthesize_type_class_instances();
auto inst = m_ctx.mk_class_instance(mk_app(mk_const(name{"lean", "parser", "has_view"}), exp, r, m));
if (!inst)
throw elaborator_exception(e, sstream() << "Could not infer instance of parser.has_view for '" << r
<< "'");
struc << "| «" << fname << "» : " << instantiate_mvars(m) << " -> " << macro.to_string() << ".view\n";
struc << "| " << fname << " : " << instantiate_mvars(m) << " -> " << macro.to_string() << ".view\n";
view_cases << "| " << i << " := " << macro.to_string() << ".view." << fname << " <$> view (" << pp(r)
<< " : " << pp(exp) << ") stx\n";

View file

@ -50,7 +50,10 @@ end b"
-- should not be a parser error
#eval show_parse "section a end"
#eval show_parse "notation `Prop` := _"
universes u v
#check Type max u v -- eh
-- parsed as `Type (max) (u) (v)`, will fail on elaboration ("max: must have at least two arguments", "function expected at 'Type'", "unknown identifier 'u'/'v'")
#eval show_parse "#check Type max u v"
-- expansion example
#eval (do {

View file

@ -109,22 +109,20 @@ result:
result:
[(lean.parser.module [] [] [(lean.parser.section "section" [a] [] "end" [])])
(lean.parser.parser.eoi "")]
Type (max u v) : Type ((max u v)+1)
result:
[(lean.parser.module
[]
[]
[(lean.parser.notation
"notation"
(lean.parser.notation_spec
(1
(lean.parser.notation_spec.rules
[]
[(lean.parser.notation_spec.rule
(lean.parser.notation_spec.notation_symbol
(0 (lean.parser.notation_spec.notation_quoted_symbol "`" "Prop" "`" [])))
[])])))
":="
(lean.parser.hole "_"))])
[(lean.parser.check
"#check"
(lean.parser.term.app
(lean.parser.term.app
(lean.parser.term.sort_app
(lean.parser.term.sort (2 "Type"))
(lean.parser.level.leading (0 "max")))
u)
v))])
(lean.parser.parser.eoi "")]
(lean.parser.notation
"notation"
@ -148,12 +146,8 @@ result:
":"
(lean.parser.notation_spec.action_kind (0 (lean.parser.parser.base10_lit "10"))))])))])])))
":="
(lean.parser.hole "_"))
(lean.parser.term.hole "_"))
notation`+`:10 b:10 :=_
error at line 10, column 19:
expected "_"
error at line 11, column 26:
expected "_"
error at line 85, column 0:
expected command
partial syntax tree:
@ -171,7 +165,9 @@ partial syntax tree:
(0 (lean.parser.notation_spec.notation_quoted_symbol "`" "Prop" "`" [])))
[])])))
":="
(lean.parser.hole <missing>))
(lean.parser.term.sort_app
(lean.parser.term.sort (0 "Sort"))
(lean.parser.level.leading (4 (lean.parser.parser.base10_lit "0")))))
(lean.parser.notation
"notation"
(lean.parser.notation_spec
@ -194,7 +190,7 @@ partial syntax tree:
":"
(lean.parser.notation_spec.action_kind (0 (lean.parser.parser.base10_lit "0"))))])))])])))
":="
(lean.parser.hole <missing>))
(lean.parser.term.app f a))
(lean.parser.reserve_mixfix
["reserve" (lean.parser.mixfix.kind (0 "prefix"))]
(lean.parser.notation_spec.notation_symbol