From a820b9955fea67bd5b3c202ade05697497d521f9 Mon Sep 17 00:00:00 2001 From: Sebastian Ullrich Date: Mon, 15 Oct 2018 10:21:08 +0200 Subject: [PATCH] perf(library/init/lean/parser/term): index term parsers by leading token 66% speedup on core.lean --- library/init/lean/elaborator.lean | 9 +++- library/init/lean/parser/basic.lean | 24 ++++++++-- library/init/lean/parser/pratt.lean | 4 +- library/init/lean/parser/term.lean | 71 +++++++++++++++++------------ library/init/lean/parser/token.lean | 13 ++++++ 5 files changed, 83 insertions(+), 38 deletions(-) diff --git a/library/init/lean/elaborator.lean b/library/init/lean/elaborator.lean index e0baf4e6a4..eb1fe916ca 100644 --- a/library/init/lean/elaborator.lean +++ b/library/init/lean/elaborator.lean @@ -96,12 +96,17 @@ do -- build and register parser | _ := throw "register_notation_parser: unimplemented", pure $ psym::ptrans.to_monad ), + first_rule::_ ← pure spec.rules | throw "register_notation_parser: unreachable", + first_tk ← match first_rule.symbol with + | notation_symbol.view.quoted {symbol := some a ..} := + pure a.val.trim + | _ := throw "register_notation_parser: unreachable", let ps := ps.bind id, cfg ← match spec.prefix_arg with | none := pure {cfg with leading_term_parsers := - parser.combinators.node k ps::cfg.leading_term_parsers} + cfg.leading_term_parsers.insert first_tk $ parser.combinators.node k ps} | some _ := pure {cfg with trailing_term_parsers := - parser.combinators.node k (read::ps.map coe)::cfg.trailing_term_parsers}, + cfg.trailing_term_parsers.insert first_tk $ parser.combinators.node k (read::ps.map coe)}, pure cfg /-- Recreate `elaborator_state.parser_cfg` from the elaborator state and the initial config, diff --git a/library/init/lean/parser/basic.lean b/library/init/lean/parser/basic.lean index 7fac098191..b46919001b 100644 --- a/library/init/lean/parser/basic.lean +++ b/library/init/lean/parser/basic.lean @@ -191,11 +191,29 @@ abbreviation trailing_term_parser := trailing_term_parser_m syntax instance trailing_term_parser_coe : has_coe term_parser trailing_term_parser := ⟨λ x _, x⟩ +local attribute [instance] name.has_lt_quick +/-- A multimap indexed by tokens. Used for indexing parsers by their leading token. -/ +def token_map (α : Type) := rbmap name (list α) (<) + +def token_map.insert {α : Type} (map : token_map α) (k : name) (v : α) : token_map α := +match map.find k with +| none := map.insert k [v] +| some vs := map.insert k (v::vs) + +def token_map.of_list {α : Type} : list (name × α) → token_map α +| [] := mk_rbmap _ _ _ +| (⟨k,v⟩::xs) := (token_map.of_list xs).insert k v + +instance token_map_nil.tokens : parser.has_tokens $ @token_map.of_list ρ [] := +default _ +instance token_map_cons.tokens (k : name) (r : ρ) (rs : list (name × ρ)) [parser.has_tokens r] [parser.has_tokens $ token_map.of_list rs] : + parser.has_tokens $ token_map.of_list ((k,r)::rs) := +⟨tokens r ++ tokens (token_map.of_list rs)⟩ + -- This needs to be a separate structure since `term_parser`s cannot contain themselves in their config --- TODO: we need indexing here. structure command_parser_config extends parser_config := -(leading_term_parsers : list term_parser) -(trailing_term_parsers : list trailing_term_parser) +(leading_term_parsers : token_map term_parser) +(trailing_term_parsers : token_map trailing_term_parser) instance command_parser_config_coe_parser_config : has_coe command_parser_config parser_config := ⟨command_parser_config.to_parser_config⟩ diff --git a/library/init/lean/parser/pratt.lean b/library/init/lean/parser/pratt.lean index e0a06977b5..9f487fc02d 100644 --- a/library/init/lean/parser/pratt.lean +++ b/library/init/lean/parser/pratt.lean @@ -18,9 +18,7 @@ local notation `m` := rec_t nat syntax base_m local notation `parser` := m syntax def curr_lbp : m nat := -do st ← get, - -- suppress error messages - except.ok tk ← (monad_lift $ observing $ lookahead token) <* put st | pure 0 <* put st, +do except.ok tk ← monad_lift peek_token | pure 0, match tk with | syntax.atom ⟨_, sym⟩ := do cfg ← read, diff --git a/library/init/lean/parser/term.lean b/library/init/lean/parser/term.lean index fd5066548c..09e845d09f 100644 --- a/library/init/lean/parser/term.lean +++ b/library/init/lean/parser/term.lean @@ -53,7 +53,11 @@ node! hole [hole: symbol "_" max_prec] @[derive parser.has_tokens parser.has_view] def sort.parser : term_parser := -node_choice! sort {"Sort":max_prec, "Type":max_prec} +node! sort ["Sort":max_prec] + +@[derive parser.has_tokens parser.has_view] +def type.parser : term_parser := +node! type ["Type":max_prec] section binder @[derive has_tokens has_view] @@ -289,27 +293,33 @@ node! anonymous_inaccessible ["._":max_prec] -- TODO(Sebastian): replace with attribute @[derive has_tokens] -def builtin_leading_parsers : list term_parser := [ - ident_univs.parser, - number.parser, - string_lit.parser, - paren.parser, - hole.parser, - sort.parser, - lambda.parser, - pi.parser, - anonymous_constructor.parser, - explicit_ident.parser, - let.parser, - have.parser, - show.parser, - assume.parser, - match.parser, - if.parser, - struct_inst.parser, - subtype.parser, - inaccessible.parser, - anonymous_inaccessible.parser +def builtin_leading_parsers : token_map term_parser := token_map.of_list [ + (ident.name, ident_univs.parser), + (number.name, number.parser), + (string_lit.name, string_lit.parser), + ("(", paren.parser), + ("_", hole.parser), + ("Sort", sort.parser), + ("Type", type.parser), + ("λ", lambda.parser), + ("fun", lambda.parser), + ("Π", pi.parser), + ("Pi", pi.parser), + ("∀", pi.parser), + ("forall", pi.parser), + ("⟨", anonymous_constructor.parser), + ("@", explicit_ident.parser), + ("@@", explicit_ident.parser), + ("let", let.parser), + ("have", have.parser), + ("show", show.parser), + ("assume", assume.parser), + ("match", match.parser), + ("if", if.parser), + ("{", struct_inst.parser), + ("{", subtype.parser), + (".(", inaccessible.parser), + ("._", anonymous_inaccessible.parser) ] @[derive parser.has_tokens parser.has_view] @@ -327,8 +337,6 @@ node! arrow [dom: get_leading, op: unicode_symbol "→" "->" 25, range: term.par @[derive parser.has_view] def projection.parser : trailing_term_parser := -/- Use max_prec + 1 so that it bind more tightly than application: - `a (b).c` should be parsed as `a ((b).c)`. -/ try $ node! projection [ term: get_leading, -- do not consume trailing whitespace @@ -341,25 +349,28 @@ try $ node! projection [ -- register '.' manually because of `raw_str` instance projection.tokens : has_tokens projection.parser := -⟨[{«prefix» := ".", lbp := max_prec}]⟩ +/- Use max_prec + 1 so that it bind more tightly than application: + `a (b).c` should be parsed as `a ((b).c)`. -/ +⟨[{«prefix» := ".", lbp := max_prec.succ}]⟩ @[derive has_tokens] -def builtin_trailing_parsers : list trailing_term_parser := [ - arrow.parser, - projection.parser +def builtin_trailing_parsers : token_map trailing_term_parser := token_map.of_list [ + ("→", arrow.parser), + ("->", arrow.parser), + (".", projection.parser) ] end term def term_parser.run (p : term_parser) : command_parser := do cfg ← read, - let trailing : trailing_term_parser := (longest_match cfg.trailing_term_parsers) <|> + let trailing : trailing_term_parser := (indexed cfg.trailing_term_parsers >>= longest_match) <|> -- The application parsers should only be tried as a fall-back; -- e.g. `a + b` should not be parsed as `a (+ b)`. --TODO(Sebastian): We should be able to remove this workaround using -- the proposed more robust precedence handling any_of [term.sort_app.parser, term.app.parser], - adapt_reader coe $ pratt_parser (longest_match cfg.leading_term_parsers) trailing p + adapt_reader coe $ pratt_parser (indexed cfg.leading_term_parsers >>= longest_match) trailing p end parser end lean diff --git a/library/init/lean/parser/token.lean b/library/init/lean/parser/token.lean index 961b87c45e..da8e12755a 100644 --- a/library/init/lean/parser/token.lean +++ b/library/init/lean/parser/token.lean @@ -185,6 +185,10 @@ do it ← left_over, pure tk ) +def peek_token : basic_parser_m (except (parsec.message syntax) syntax) := +do st ← get, + observing (try (lookahead token)) <* put st + variable [monad_basic_parser m] def symbol (sym : string) (lbp := 0) : parser := @@ -303,5 +307,14 @@ lift $ any_of [symbol unicode lbp, symbol ascii lbp] -- use unicode variant by default instance unicode_symbol.view_default (u a lbp) : parser.has_view_default (unicode_symbol u a lbp : parser) _ (syntax.atom ⟨none, u⟩) := ⟨⟩ +def indexed {α : Type} (map : token_map α) : m (list α) := +lift $ do + except.ok tk ← peek_token | error "", + n ← match tk with + | syntax.atom ⟨_, s⟩ := pure $ mk_simple_name s + | syntax.node ⟨some k, _⟩ := pure k.name + | _ := error "", + option.to_monad $ map.find n + end «parser» end lean