From 98e09c274f5d28ee775e8d1b40b3fe54e307efe1 Mon Sep 17 00:00:00 2001 From: Sebastian Ullrich Date: Wed, 12 Sep 2018 15:12:05 -0700 Subject: [PATCH] feat(library/init/lean/parser/{pratt,level}): factor out `pratt` combinator, implement level parsers --- library/init/lean/parser/basic.lean | 12 ++++- library/init/lean/parser/command.lean | 6 ++- library/init/lean/parser/level.lean | 71 +++++++++++++++++++++++++++ library/init/lean/parser/module.lean | 2 +- library/init/lean/parser/parsec.lean | 3 ++ library/init/lean/parser/pratt.lean | 56 +++++++++++++++++++++ library/init/lean/parser/term.lean | 60 +++++++++++----------- library/init/lean/parser/token.lean | 2 +- src/frontends/lean/elaborator.cpp | 6 ++- tests/lean/parser1.lean | 5 +- tests/lean/parser1.lean.expected.out | 34 ++++++------- 11 files changed, 200 insertions(+), 57 deletions(-) create mode 100644 library/init/lean/parser/level.lean create mode 100644 library/init/lean/parser/pratt.lean diff --git a/library/init/lean/parser/basic.lean b/library/init/lean/parser/basic.lean index bdeecf8784..7135a6e10c 100644 --- a/library/init/lean/parser/basic.lean +++ b/library/init/lean/parser/basic.lean @@ -39,7 +39,7 @@ instance : monad (rec_t α δ m) := infer_instance instance [alternative m] : alternative (rec_t α δ m) := infer_instance instance : has_monad_lift m (rec_t α δ m) := infer_instance instance (ε) [monad_except ε m] : monad_except ε (rec_t α δ m) := infer_instance -instance (μ) [alternative m] [lean.parser.monad_parsec μ m] : lean.parser.monad_parsec μ (rec_t α δ m) := +instance (μ) [lean.parser.monad_parsec μ m] : lean.parser.monad_parsec μ (rec_t α δ m) := infer_instance end rec_t @@ -347,6 +347,16 @@ instance monad_lift.view {m' : Type → Type} [has_monad_lift_t m m'] (r : m syn parser.has_view (monad_lift r : m' syntax) α := {..i} +instance seq_left.tokens {α : Type} (x : m α) (p : m syntax) [parser.has_tokens p] : parser.has_tokens (p <* x) := +⟨tokens p⟩ +instance seq_left.view {α β : Type} (x : m α) (p : m syntax) [i : parser.has_view p β] : parser.has_view (p <* x) β := +{..i} + +instance seq_right.tokens {α : Type} (x : m α) (p : m syntax) [parser.has_tokens p] : parser.has_tokens (x *> p) := +⟨tokens p⟩ +instance seq_right.view {α β : Type} (x : m α) (p : m syntax) [i : parser.has_view p β] : parser.has_view (x *> p) β := +{..i} + end combinators end «parser» end lean diff --git a/library/init/lean/parser/command.lean b/library/init/lean/parser/command.lean index 24babc0d63..70b5e063ab 100644 --- a/library/init/lean/parser/command.lean +++ b/library/init/lean/parser/command.lean @@ -135,10 +135,14 @@ node! «reserve_mixfix» [ try ["reserve", kind: mixfix.kind.parser], symbol: notation_spec.notation_symbol.parser] +@[derive parser.has_tokens parser.has_view] +def check.parser : command_parser := +node! check ["#check", term: term.parser] + @[derive parser.has_tokens parser.has_view] def command.parser : command_parser := any_of [open.parser, section.parser, universe.parser, notation.parser, reserve_notation.parser, - mixfix.parser, reserve_mixfix.parser] "command" + mixfix.parser, reserve_mixfix.parser, check.parser] "command" end parser diff --git a/library/init/lean/parser/level.lean b/library/init/lean/parser/level.lean new file mode 100644 index 0000000000..3081df44e0 --- /dev/null +++ b/library/init/lean/parser/level.lean @@ -0,0 +1,71 @@ +/- +Copyright (c) 2018 Microsoft Corporation. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: Sebastian Ullrich + +Level-level parsers +-/ +prelude +import init.lean.parser.pratt + +namespace lean +namespace parser +namespace level +open combinators parser.has_view monad_parsec + +@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_rec monad_basic_read] +def level_parser_m := rec_t nat syntax basic_parser_m +abbreviation level_parser := level_parser_m syntax + +/-- A term parser for a suffix or infix notation that accepts a preceding term. -/ +@[derive monad alternative monad_reader monad_state monad_parsec monad_except monad_rec monad_basic_read] +def trailing_level_parser_m := reader_t syntax level_parser_m +abbreviation trailing_level_parser := trailing_level_parser_m syntax + +/-- Access leading term -/ +def get_leading : trailing_level_parser := read +instance : has_tokens get_leading := ⟨[]⟩ +instance : has_view get_leading syntax := default _ + +@[derive parser.has_tokens parser.has_view] +def paren.parser : level_parser := +node! «paren» ["(":max_prec, inner: recurse 0, ")"] + +@[derive parser.has_tokens parser.has_view] +def leading.parser : level_parser := +node_choice! leading { + max: raw_symbol "max", + imax: raw_symbol "imax", + hole: symbol "_" max_prec, + paren: paren.parser, + lit: number, + var: ident +} + +@[derive parser.has_tokens parser.has_view] +def app.parser : trailing_level_parser := +node! app [fn: get_leading, arg: recurse max_prec] + +@[derive parser.has_tokens parser.has_view] +def add_lit.parser : trailing_level_parser := +node! add_lit [lhs: get_leading, "+", rhs: number] + +@[derive parser.has_tokens parser.has_view] +def trailing.parser : trailing_level_parser := +node_choice! trailing { + app: app.parser, + add_lit: add_lit.parser +} +end level + +def level.parser (rbp := 0) : basic_parser := +pratt_parser level.leading.parser level.trailing.parser rbp "universe term" + +-- `[derive]` doesn't manage to derive these instances because of the parameter +instance level.parser.tokens (rbp) : has_tokens (level.parser rbp) := +⟨has_tokens.tokens level.leading.parser ++ has_tokens.tokens level.trailing.parser⟩ +instance level.parser.view (rbp) : has_view (level.parser rbp) syntax := +default _ + +end parser +end lean diff --git a/library/init/lean/parser/module.lean b/library/init/lean/parser/module.lean index d67f9ff15b..25b5befa2b 100644 --- a/library/init/lean/parser/module.lean +++ b/library/init/lean/parser/module.lean @@ -25,7 +25,7 @@ def module_parser_m := parser_t (coroutine unit syntax) end abbreviation module_parser := module_parser_m syntax -instance module_parser_m.lift_basic_parser_m : has_monad_lift_t basic_parser_m module_parser_m := +instance module_parser_m.lift_basic_parser_m : monad_basic_read module_parser_m := { monad_lift := λ α x, ⟨λ r, ⟨λ st it, pure (((x.run r).run st) it)⟩⟩ } @[derive parser.has_view parser.has_tokens] diff --git a/library/init/lean/parser/parsec.lean b/library/init/lean/parser/parsec.lean index 4b49a1fb84..c836889fec 100644 --- a/library/init/lean/parser/parsec.lean +++ b/library/init/lean/parser/parsec.lean @@ -545,6 +545,9 @@ do it ← left_over, def dbg (label : string) (p : m α) : m α := map (λ m' inst β, @parsec_t.dbg m' inst μ β label) p +def observing [monad_except (message μ) m] (p : m α) : m (except (message μ) α) := +catch (except.ok <$> p) $ λ msg, pure (except.error msg) + end monad_parsec namespace monad_parsec diff --git a/library/init/lean/parser/pratt.lean b/library/init/lean/parser/pratt.lean new file mode 100644 index 0000000000..2023cd36eb --- /dev/null +++ b/library/init/lean/parser/pratt.lean @@ -0,0 +1,56 @@ +/- +Copyright (c) 2018 Microsoft Corporation. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: Sebastian Ullrich + +A combinator for building Pratt parsers +-/ +prelude +import init.lean.parser.token + +namespace lean.parser +open monad_parsec combinators + +variables {base_m : Type → Type} +variables [monad base_m] [monad_basic_read base_m] [monad_state parser_state base_m] [monad_parsec syntax base_m] + +local notation `m` := rec_t nat syntax base_m +local notation `parser` := m syntax + +def curr_lbp : m nat := +do st ← get, + except.ok tk ← (monad_lift $ observing $ lookahead token) <* put st | pure 0 <* put st, + match tk with + | syntax.atom ⟨_, atomic_val.string sym⟩ := do + st ← get, + some ⟨_, tk_cfg⟩ ← pure (st.tokens.match_prefix sym.mk_iterator) | error "curr_lbp: unreachable", + pure tk_cfg.lbp + | syntax.node ⟨base10_lit, _⟩ := pure max_prec + | syntax.ident _ := pure max_prec + | _ := error "curr_lbp: unknown token kind" + +private def trailing_loop (trailing : reader_t syntax m syntax) (rbp : nat) : nat → syntax → parser +| 0 _ := error "unreachable" +| (n+1) left := do +lbp ← curr_lbp, +if rbp < lbp then do + left ← trailing.run left, + trailing_loop n left +else + pure left + +variables [monad_except (parsec.message syntax) base_m] [alternative base_m] +variables (leading : m syntax) (trailing : reader_t syntax m syntax) + +def pratt_parser (rbp := 0) : base_m syntax := +with_recurse rbp $ λ rbp, do +left ← leading, +n ← remaining, +trailing_loop trailing rbp (n+1) left + +instance pratt_parser.tokens [has_tokens leading] [has_tokens trailing] : has_tokens (pratt_parser leading trailing) := +⟨has_tokens.tokens leading ++ has_tokens.tokens trailing⟩ +instance pratt_parser.view : has_view (pratt_parser leading trailing) syntax := +default _ + +end lean.parser diff --git a/library/init/lean/parser/term.lean b/library/init/lean/parser/term.lean index 106a7a0baf..eb7fce3610 100644 --- a/library/init/lean/parser/term.lean +++ b/library/init/lean/parser/term.lean @@ -6,7 +6,7 @@ Author: Sebastian Ullrich Term-level parsers -/ prelude -import init.lean.parser.token +import init.lean.parser.level namespace lean namespace parser @@ -25,58 +25,56 @@ abbreviation term_parser := term_parser_m syntax def trailing_term_parser_m := reader_t syntax term_parser_m abbreviation trailing_term_parser := trailing_term_parser_m syntax +namespace term /-- Access leading term -/ -def leading : trailing_term_parser := read -instance : has_tokens leading := ⟨[]⟩ -instance : has_view leading syntax := default _ +def get_leading : trailing_term_parser := read +instance : has_tokens get_leading := ⟨[]⟩ +instance : has_view get_leading syntax := default _ @[derive parser.has_tokens parser.has_view] def hole.parser : term_parser := -node! hole [hole: symbol "_"] +node! hole [hole: symbol "_" max_prec] @[derive parser.has_tokens parser.has_view] -def app.parser : trailing_term_parser := -node! app [fn: leading, arg: recurse max_prec] +def sort.parser : term_parser := +node_choice! sort {"Sort":max_prec, "Sort*":max_prec, "Type":max_prec, "Type*":max_prec} @[derive parser.has_tokens parser.has_view] def leading.parser := any_of [ - hole.parser + ident, + number, + hole.parser, + sort.parser ] +@[derive parser.has_tokens parser.has_view] +def sort_app.parser : trailing_term_parser := +do { l ← get_leading, guard (syntax_node_kind.has_view.view sort l).is_some } *> +node! sort_app [fn: get_leading, arg: monad_lift (level.parser max_prec)] + +@[derive parser.has_tokens parser.has_view] +def app.parser : trailing_term_parser := +node! app [fn: get_leading, arg: recurse max_prec] + @[derive parser.has_tokens parser.has_view] def trailing.parser : trailing_term_parser := any_of [ + sort_app.parser, app.parser ] -/- Pratt parser -/ - -def curr_lbp : term_parser_m nat := -do some tk ← monad_lift match_token | pure 0, - pure tk.lbp - -def trailing_loop (rbp : nat) : nat → syntax → term_parser -| 0 _ := error "unreachable" -| (n+1) left := do - lbp ← curr_lbp, - if rbp < lbp then do - left ← trailing.parser.run left, - trailing_loop n left - else - pure left +end term -- While term.parser does not actually read a command, it does share the same effect set -- with command parsers, introducing the term-level recursion effect only for nested parsers -def term.parser : command_parser := -with_recurse 0 $ λ rbp, do - left ← leading.parser, - n ← remaining, - trailing_loop rbp (n+1) left +def term.parser (rbp := 0) : command_parser := +pratt_parser term.leading.parser term.trailing.parser rbp "term" -instance term.parser.tokens : has_tokens term.parser := -⟨has_tokens.tokens leading.parser ++ has_tokens.tokens trailing.parser⟩ -instance term.parser.view : has_view term.parser syntax := +-- `[derive]` doesn't manage to derive these instances because of the parameter +instance term.parser.tokens (rbp) : has_tokens (term.parser rbp) := +⟨has_tokens.tokens term.leading.parser ++ has_tokens.tokens term.trailing.parser⟩ +instance term.parser.view (rbp) : has_view (term.parser rbp) syntax := default _ end parser diff --git a/library/init/lean/parser/token.lean b/library/init/lean/parser/token.lean index bccc9ab2e9..120927201b 100644 --- a/library/init/lean/parser/token.lean +++ b/library/init/lean/parser/token.lean @@ -65,7 +65,7 @@ do token_start ← parser_state.token_start <$> get, lift whitespace, it ← left_over, a ← r, - -- TODO(Sebastian): less greedy, more natural whitespace assignement + -- TODO(Sebastian): less greedy, more natural whitespace assignment -- E.g. only read up to the next line break trailing ← lift whitespace, it2 ← left_over, diff --git a/src/frontends/lean/elaborator.cpp b/src/frontends/lean/elaborator.cpp index 106e4b7f1f..7533765458 100644 --- a/src/frontends/lean/elaborator.cpp +++ b/src/frontends/lean/elaborator.cpp @@ -3229,6 +3229,7 @@ expr elaborator::visit_node_macro(expr const & e, optional const & expecte name fname = *get_name(mdata_data(r), "fname"); r = mdata_expr(r); r = visit(r, expected_type); + synthesize_type_class_instances(); auto m = mk_metavar(mk_Type(), r); auto inst = m_ctx.mk_class_instance(mk_app(mk_const(name{"lean", "parser", "has_view"}), exp, r, m)); if (!inst) @@ -3302,15 +3303,16 @@ expr elaborator::visit_node_choice_macro(expr const & e, optional const & buffer new_args; for (expr e = args; is_app(e); e = app_arg(e)) { expr r = app_arg(app_fn(e)); - name fname = *get_name(mdata_data(r), "fname"); + std::string fname = "«" + get_name(mdata_data(r), "fname")->to_string() + "»"; r = mdata_expr(r); auto m = mk_metavar(mk_Type(), r); r = visit(r, expected_type); + synthesize_type_class_instances(); auto inst = m_ctx.mk_class_instance(mk_app(mk_const(name{"lean", "parser", "has_view"}), exp, r, m)); if (!inst) throw elaborator_exception(e, sstream() << "Could not infer instance of parser.has_view for '" << r << "'"); - struc << "| «" << fname << "» : " << instantiate_mvars(m) << " -> " << macro.to_string() << ".view\n"; + struc << "| " << fname << " : " << instantiate_mvars(m) << " -> " << macro.to_string() << ".view\n"; view_cases << "| " << i << " := " << macro.to_string() << ".view." << fname << " <$> view (" << pp(r) << " : " << pp(exp) << ") stx\n"; diff --git a/tests/lean/parser1.lean b/tests/lean/parser1.lean index 504983a328..db08f0df5d 100644 --- a/tests/lean/parser1.lean +++ b/tests/lean/parser1.lean @@ -50,7 +50,10 @@ end b" -- should not be a parser error #eval show_parse "section a end" -#eval show_parse "notation `Prop` := _" +universes u v +#check Type max u v -- eh +-- parsed as `Type (max) (u) (v)`, will fail on elaboration ("max: must have at least two arguments", "function expected at 'Type'", "unknown identifier 'u'/'v'") +#eval show_parse "#check Type max u v" -- expansion example #eval (do { diff --git a/tests/lean/parser1.lean.expected.out b/tests/lean/parser1.lean.expected.out index 18252805c3..48c654cc53 100644 --- a/tests/lean/parser1.lean.expected.out +++ b/tests/lean/parser1.lean.expected.out @@ -109,22 +109,20 @@ result: result: [(lean.parser.module [] [] [(lean.parser.section "section" [a] [] "end" [])]) (lean.parser.parser.eoi "")] +Type (max u v) : Type ((max u v)+1) result: [(lean.parser.module [] [] - [(lean.parser.notation - "notation" - (lean.parser.notation_spec - (1 - (lean.parser.notation_spec.rules - [] - [(lean.parser.notation_spec.rule - (lean.parser.notation_spec.notation_symbol - (0 (lean.parser.notation_spec.notation_quoted_symbol "`" "Prop" "`" []))) - [])]))) - ":=" - (lean.parser.hole "_"))]) + [(lean.parser.check + "#check" + (lean.parser.term.app + (lean.parser.term.app + (lean.parser.term.sort_app + (lean.parser.term.sort (2 "Type")) + (lean.parser.level.leading (0 "max"))) + u) + v))]) (lean.parser.parser.eoi "")] (lean.parser.notation "notation" @@ -148,12 +146,8 @@ result: ":" (lean.parser.notation_spec.action_kind (0 (lean.parser.parser.base10_lit "10"))))])))])]))) ":=" - (lean.parser.hole "_")) + (lean.parser.term.hole "_")) notation`+`:10 b:10 :=_ -error at line 10, column 19: -expected "_" -error at line 11, column 26: -expected "_" error at line 85, column 0: expected command partial syntax tree: @@ -171,7 +165,9 @@ partial syntax tree: (0 (lean.parser.notation_spec.notation_quoted_symbol "`" "Prop" "`" []))) [])]))) ":=" - (lean.parser.hole )) + (lean.parser.term.sort_app + (lean.parser.term.sort (0 "Sort")) + (lean.parser.level.leading (4 (lean.parser.parser.base10_lit "0"))))) (lean.parser.notation "notation" (lean.parser.notation_spec @@ -194,7 +190,7 @@ partial syntax tree: ":" (lean.parser.notation_spec.action_kind (0 (lean.parser.parser.base10_lit "0"))))])))])]))) ":=" - (lean.parser.hole )) + (lean.parser.term.app f a)) (lean.parser.reserve_mixfix ["reserve" (lean.parser.mixfix.kind (0 "prefix"))] (lean.parser.notation_spec.notation_symbol